From ddcd945e556e2cc6be8f88ef0271b56927ffbe98 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Sun, 11 Oct 2020 08:54:38 -0700
Subject: rtw88: fix fw_fifo_addr check

The clang build reports this warning

fw.c:1485:21: warning: address of array 'rtwdev->chip->fw_fifo_addr'
  will always evaluate to 'true'
        if (!rtwdev->chip->fw_fifo_addr) {

fw_fifo_addr is an array in rtw_chip_info so it is always
nonzero.  A better check is if the first element of the array is
nonzero.  In the cases where fw_fifo_addr is initialized by rtw88b
and rtw88c, the first array element is 0x780.

Fixes: 0fbc2f0f34cc ("rtw88: add dump firmware fifo support")
Signed-off-by: Tom Rix <trix@redhat.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Acked-by: Tzu-En Huang <tehuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201011155438.15892-1-trix@redhat.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 042015bc8055..b2fd87834f23 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -1482,7 +1482,7 @@ static bool rtw_fw_dump_check_size(struct rtw_dev *rtwdev,
 int rtw_fw_dump_fifo(struct rtw_dev *rtwdev, u8 fifo_sel, u32 addr, u32 size,
 		     u32 *buffer)
 {
-	if (!rtwdev->chip->fw_fifo_addr) {
+	if (!rtwdev->chip->fw_fifo_addr[0]) {
 		rtw_dbg(rtwdev, RTW_DBG_FW, "chip not support dump fw fifo\n");
 		return -ENOTSUPP;
 	}
-- 
cgit v1.2.3


From 04516706bb99889986ddfa3a769ed50d2dc7ac13 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Oct 2020 16:51:03 +0300
Subject: iwlwifi: pcie: limit memory read spin time

When we read device memory, we lock a spinlock, write the address we
want to read from the device and then spin in a loop reading the data
in 32-bit quantities from another register.

As the description makes clear, this is rather inefficient, incurring
a PCIe bus transaction for every read. In a typical device today, we
want to read 786k SMEM if it crashes, leading to 192k register reads.
Occasionally, we've seen the whole loop take over 20 seconds and then
triggering the soft lockup detector.

Clearly, it is unreasonable to spin here for such extended periods of
time.

To fix this, break the loop down into an outer and an inner loop, and
break out of the inner loop if more than half a second elapsed. To
avoid too much overhead, check for that only every 128 reads, though
there's no particular reason for that number. Then, unlock and relock
to obtain NIC access again, reprogram the start address and continue.

This will keep (interrupt) latencies on the CPU down to a reasonable
time.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201022165103.45878a7e49aa.I3b9b9c5a10002915072312ce75b68ed5b3dc6e14@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 36 ++++++++++++++++++-------
 1 file changed, 27 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index d2e69ad53b27..2fffbbc8462f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2156,18 +2156,36 @@ static int iwl_trans_pcie_read_mem(struct iwl_trans *trans, u32 addr,
 				   void *buf, int dwords)
 {
 	unsigned long flags;
-	int offs, ret = 0;
+	int offs = 0;
 	u32 *vals = buf;
 
-	if (iwl_trans_grab_nic_access(trans, &flags)) {
-		iwl_write32(trans, HBUS_TARG_MEM_RADDR, addr);
-		for (offs = 0; offs < dwords; offs++)
-			vals[offs] = iwl_read32(trans, HBUS_TARG_MEM_RDAT);
-		iwl_trans_release_nic_access(trans, &flags);
-	} else {
-		ret = -EBUSY;
+	while (offs < dwords) {
+		/* limit the time we spin here under lock to 1/2s */
+		ktime_t timeout = ktime_add_us(ktime_get(), 500 * USEC_PER_MSEC);
+
+		if (iwl_trans_grab_nic_access(trans, &flags)) {
+			iwl_write32(trans, HBUS_TARG_MEM_RADDR,
+				    addr + 4 * offs);
+
+			while (offs < dwords) {
+				vals[offs] = iwl_read32(trans,
+							HBUS_TARG_MEM_RDAT);
+				offs++;
+
+				/* calling ktime_get is expensive so
+				 * do it once in 128 reads
+				 */
+				if (offs % 128 == 0 && ktime_after(ktime_get(),
+								   timeout))
+					break;
+			}
+			iwl_trans_release_nic_access(trans, &flags);
+		} else {
+			return -EBUSY;
+		}
 	}
-	return ret;
+
+	return 0;
 }
 
 static int iwl_trans_pcie_write_mem(struct iwl_trans *trans, u32 addr,
-- 
cgit v1.2.3


From 397a973b9978533418892c6453853c52b2ad8ec6 Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <tony0620emma@gmail.com>
Date: Mon, 12 Oct 2020 16:11:40 +0800
Subject: MAINTAINERS: update Yan-Hsuan's email address

Switch my email to gmail.com address.

Signed-off-by: Yan-Hsuan Chuang <tony0620emma@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201012081140.18085-1-tony0620emma@gmail.com
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e73636b75f29..bd2d28cfdb4e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14831,7 +14831,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-testing.g
 F:	drivers/net/wireless/realtek/rtlwifi/
 
 REALTEK WIRELESS DRIVER (rtw88)
-M:	Yan-Hsuan Chuang <yhchuang@realtek.com>
+M:	Yan-Hsuan Chuang <tony0620emma@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	drivers/net/wireless/realtek/rtw88/
-- 
cgit v1.2.3


From d85b4b2bf2d4229847d76cfd81e48d5beb72f75b Mon Sep 17 00:00:00 2001
From: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Date: Fri, 30 Oct 2020 03:56:10 -0500
Subject: MAINTAINERS: update maintainers list for Cypress

- Update maintainers' email with Infineon hosted email
- Add Chung-hsien Hsu as a maintainer

Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20201030085610.145679-1-chi-hsien.lin@cypress.com
---
 MAINTAINERS | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index bd2d28cfdb4e..900ff33789eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3541,11 +3541,12 @@ BROADCOM BRCM80211 IEEE802.11n WIRELESS DRIVER
 M:	Arend van Spriel <arend.vanspriel@broadcom.com>
 M:	Franky Lin <franky.lin@broadcom.com>
 M:	Hante Meuleman <hante.meuleman@broadcom.com>
-M:	Chi-Hsien Lin <chi-hsien.lin@cypress.com>
-M:	Wright Feng <wright.feng@cypress.com>
+M:	Chi-hsien Lin <chi-hsien.lin@infineon.com>
+M:	Wright Feng <wright.feng@infineon.com>
+M:	Chung-hsien Hsu <chung-hsien.hsu@infineon.com>
 L:	linux-wireless@vger.kernel.org
 L:	brcm80211-dev-list.pdl@broadcom.com
-L:	brcm80211-dev-list@cypress.com
+L:	SHA-cyfmac-dev-list@infineon.com
 S:	Supported
 F:	drivers/net/wireless/broadcom/brcm80211/
 
-- 
cgit v1.2.3


From c8a2e7a29702fe4626b7aa81149b7b7164e20606 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sat, 7 Nov 2020 10:50:06 +0200
Subject: iwlwifi: sta: set max HE max A-MPDU according to HE capa

Currently, our max tpt is limited to max HT A-MPDU for LB,
and max VHT A-MPDU for HB. Configure HE exponent value correctly to
achieve HE max A-MPDU, both on LB and HB.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.4486852ebb56.I9eb0d028e31f183597fb90120e7d4ca87e0dd6cb@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/sta.h | 10 +++++-----
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c    | 18 ++++++++++++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index d43e0d3f3a12..052413eef059 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012-2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012-2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -128,7 +126,9 @@ enum iwl_sta_flags {
 	STA_FLG_MAX_AGG_SIZE_256K	= (5 << STA_FLG_MAX_AGG_SIZE_SHIFT),
 	STA_FLG_MAX_AGG_SIZE_512K	= (6 << STA_FLG_MAX_AGG_SIZE_SHIFT),
 	STA_FLG_MAX_AGG_SIZE_1024K	= (7 << STA_FLG_MAX_AGG_SIZE_SHIFT),
-	STA_FLG_MAX_AGG_SIZE_MSK	= (7 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+	STA_FLG_MAX_AGG_SIZE_2M		= (8 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+	STA_FLG_MAX_AGG_SIZE_4M		= (9 << STA_FLG_MAX_AGG_SIZE_SHIFT),
+	STA_FLG_MAX_AGG_SIZE_MSK	= (0xf << STA_FLG_MAX_AGG_SIZE_SHIFT),
 
 	STA_FLG_AGG_MPDU_DENS_SHIFT	= 23,
 	STA_FLG_AGG_MPDU_DENS_2US	= (4 << STA_FLG_AGG_MPDU_DENS_SHIFT),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 017537944fd0..799d8219463c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -196,6 +196,7 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		mpdu_dens = sta->ht_cap.ampdu_density;
 	}
 
+
 	if (sta->vht_cap.vht_supported) {
 		agg_size = sta->vht_cap.cap &
 			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
@@ -205,6 +206,23 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		agg_size = sta->ht_cap.ampdu_factor;
 	}
 
+	/* D6.0 10.12.2 A-MPDU length limit rules
+	 * A STA indicates the maximum length of the A-MPDU preEOF padding
+	 * that it can receive in an HE PPDU in the Maximum A-MPDU Length
+	 * Exponent field in its HT Capabilities, VHT Capabilities,
+	 * and HE 6 GHz Band Capabilities elements (if present) and the
+	 * Maximum AMPDU Length Exponent Extension field in its HE
+	 * Capabilities element
+	 */
+	if (sta->he_cap.has_he)
+		agg_size += u8_get_bits(sta->he_cap.he_cap_elem.mac_cap_info[3],
+					IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK);
+
+	/* Limit to max A-MPDU supported by FW */
+	if (agg_size > (STA_FLG_MAX_AGG_SIZE_4M >> STA_FLG_MAX_AGG_SIZE_SHIFT))
+		agg_size = (STA_FLG_MAX_AGG_SIZE_4M >>
+			    STA_FLG_MAX_AGG_SIZE_SHIFT);
+
 	add_sta_cmd.station_flags |=
 		cpu_to_le32(agg_size << STA_FLG_MAX_AGG_SIZE_SHIFT);
 	add_sta_cmd.station_flags |=
-- 
cgit v1.2.3


From fb8d1b6e97980057b7ebed444b8950e57f268a67 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 7 Nov 2020 10:50:07 +0200
Subject: iwlwifi: mvm: use the HOT_SPOT_CMD to cancel an AUX ROC

The ROC that runs on the AUX ROC (meaning an ROC on the STA vif),
was added with the HOT_SPOT_CMD firmware command and must be
cancelled with that same command.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Fixes: fe959c7b2049 ("iwlwifi: mvm: use the new session protection command")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.a317376154da.I44fa3637373ba4bd421cdff2cabc761bffc0735f@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/time-event.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index 7fce79c1c114..a9ebe69a6670 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -988,10 +988,13 @@ void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 			IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
 		mvmvif = iwl_mvm_vif_from_mac80211(vif);
 
-		iwl_mvm_cancel_session_protection(mvm, mvmvif);
-
-		if (vif->type == NL80211_IFTYPE_P2P_DEVICE)
+		if (vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+			iwl_mvm_cancel_session_protection(mvm, mvmvif);
 			set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+		} else {
+			iwl_mvm_remove_aux_roc_te(mvm, mvmvif,
+						  &mvmvif->time_event_data);
+		}
 
 		iwl_mvm_roc_finished(mvm);
 
-- 
cgit v1.2.3


From 1cf260e3a75b87726ec609ad1b6b88f515749786 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 7 Nov 2020 10:50:08 +0200
Subject: iwlwifi: mvm: properly cancel a session protection for P2P

We need to feed the configuration id to remove  session protection
properly.
Remember the conf_id when we add the session protection so that we
can give it back when we want to remove the session protection.
While at it, slightly improve the kernel doc for the conf_id
of the notification.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Fixes: fe959c7b2049 ("iwlwifi: mvm: use the new session protection command")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.3642f730333d.I01a98ecde62096d00d171cf34ad775bf80cb0277@changeid
---
 .../net/wireless/intel/iwlwifi/fw/api/time-event.h |  8 +-
 .../net/wireless/intel/iwlwifi/mvm/time-event.c    | 94 +++++++++++++++-------
 2 files changed, 68 insertions(+), 34 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
index a731f28e101a..53b438d709db 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/time-event.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -421,12 +421,14 @@ struct iwl_hs20_roc_res {
  *	able to run the GO Negotiation. Will not be fragmented and not
  *	repetitive. Valid only on the P2P Device MAC. Only the duration will
  *	be taken into account.
+ * @SESSION_PROTECT_CONF_MAX_ID: not used
  */
 enum iwl_mvm_session_prot_conf_id {
 	SESSION_PROTECT_CONF_ASSOC,
 	SESSION_PROTECT_CONF_GO_CLIENT_ASSOC,
 	SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV,
 	SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION,
+	SESSION_PROTECT_CONF_MAX_ID,
 }; /* SESSION_PROTECTION_CONF_ID_E_VER_1 */
 
 /**
@@ -459,7 +461,7 @@ struct iwl_mvm_session_prot_cmd {
  * @mac_id: the mac id for which the session protection started / ended
  * @status: 1 means success, 0 means failure
  * @start: 1 means the session protection started, 0 means it ended
- * @conf_id: the configuration id of the session that started / eneded
+ * @conf_id: see &enum iwl_mvm_session_prot_conf_id
  *
  * Note that any session protection will always get two notifications: start
  * and end even the firmware could not schedule it.
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
index a9ebe69a6670..1db6d8d38822 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/time-event.c
@@ -641,11 +641,32 @@ void iwl_mvm_protect_session(struct iwl_mvm *mvm,
 	}
 }
 
+static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
+					      struct iwl_mvm_vif *mvmvif)
+{
+	struct iwl_mvm_session_prot_cmd cmd = {
+		.id_and_color =
+			cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
+							mvmvif->color)),
+		.action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
+		.conf_id = cpu_to_le32(mvmvif->time_event_data.id),
+	};
+	int ret;
+
+	ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
+						   MAC_CONF_GROUP, 0),
+				   0, sizeof(cmd), &cmd);
+	if (ret)
+		IWL_ERR(mvm,
+			"Couldn't send the SESSION_PROTECTION_CMD: %d\n", ret);
+}
+
 static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 					struct iwl_mvm_time_event_data *te_data,
 					u32 *uid)
 {
 	u32 id;
+	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(te_data->vif);
 
 	/*
 	 * It is possible that by the time we got to this point the time
@@ -663,14 +684,29 @@ static bool __iwl_mvm_remove_time_event(struct iwl_mvm *mvm,
 	iwl_mvm_te_clear_data(mvm, te_data);
 	spin_unlock_bh(&mvm->time_event_lock);
 
-	/*
-	 * It is possible that by the time we try to remove it, the time event
-	 * has already ended and removed. In such a case there is no need to
-	 * send a removal command.
+	/* When session protection is supported, the te_data->id field
+	 * is reused to save session protection's configuration.
 	 */
-	if (id == TE_MAX) {
-		IWL_DEBUG_TE(mvm, "TE 0x%x has already ended\n", *uid);
+	if (fw_has_capa(&mvm->fw->ucode_capa,
+			IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD)) {
+		if (mvmvif && id < SESSION_PROTECT_CONF_MAX_ID) {
+			/* Session protection is still ongoing. Cancel it */
+			iwl_mvm_cancel_session_protection(mvm, mvmvif);
+			if (te_data->vif->type == NL80211_IFTYPE_P2P_DEVICE) {
+				set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
+				iwl_mvm_roc_finished(mvm);
+			}
+		}
 		return false;
+	} else {
+		/* It is possible that by the time we try to remove it, the
+		 * time event has already ended and removed. In such a case
+		 * there is no need to send a removal command.
+		 */
+		if (id == TE_MAX) {
+			IWL_DEBUG_TE(mvm, "TE 0x%x has already ended\n", *uid);
+			return false;
+		}
 	}
 
 	return true;
@@ -771,6 +807,7 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
 	struct iwl_rx_packet *pkt = rxb_addr(rxb);
 	struct iwl_mvm_session_prot_notif *notif = (void *)pkt->data;
 	struct ieee80211_vif *vif;
+	struct iwl_mvm_vif *mvmvif;
 
 	rcu_read_lock();
 	vif = iwl_mvm_rcu_dereference_vif_id(mvm, le32_to_cpu(notif->mac_id),
@@ -779,9 +816,10 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
 	if (!vif)
 		goto out_unlock;
 
+	mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
 	/* The vif is not a P2P_DEVICE, maintain its time_event_data */
 	if (vif->type != NL80211_IFTYPE_P2P_DEVICE) {
-		struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 		struct iwl_mvm_time_event_data *te_data =
 			&mvmvif->time_event_data;
 
@@ -816,10 +854,14 @@ void iwl_mvm_rx_session_protect_notif(struct iwl_mvm *mvm,
 
 	if (!le32_to_cpu(notif->status) || !le32_to_cpu(notif->start)) {
 		/* End TE, notify mac80211 */
+		mvmvif->time_event_data.id = SESSION_PROTECT_CONF_MAX_ID;
 		ieee80211_remain_on_channel_expired(mvm->hw);
 		set_bit(IWL_MVM_STATUS_NEED_FLUSH_P2P, &mvm->status);
 		iwl_mvm_roc_finished(mvm);
 	} else if (le32_to_cpu(notif->start)) {
+		if (WARN_ON(mvmvif->time_event_data.id !=
+				le32_to_cpu(notif->conf_id)))
+			goto out_unlock;
 		set_bit(IWL_MVM_STATUS_ROC_RUNNING, &mvm->status);
 		ieee80211_ready_on_channel(mvm->hw); /* Start TE */
 	}
@@ -845,20 +887,24 @@ iwl_mvm_start_p2p_roc_session_protection(struct iwl_mvm *mvm,
 
 	lockdep_assert_held(&mvm->mutex);
 
+	/* The time_event_data.id field is reused to save session
+	 * protection's configuration.
+	 */
 	switch (type) {
 	case IEEE80211_ROC_TYPE_NORMAL:
-		cmd.conf_id =
-			cpu_to_le32(SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV);
+		mvmvif->time_event_data.id =
+			SESSION_PROTECT_CONF_P2P_DEVICE_DISCOV;
 		break;
 	case IEEE80211_ROC_TYPE_MGMT_TX:
-		cmd.conf_id =
-			cpu_to_le32(SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION);
+		mvmvif->time_event_data.id =
+			SESSION_PROTECT_CONF_P2P_GO_NEGOTIATION;
 		break;
 	default:
 		WARN_ONCE(1, "Got an invalid ROC type\n");
 		return -EINVAL;
 	}
 
+	cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id);
 	return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
 						    MAC_CONF_GROUP, 0),
 				    0, sizeof(cmd), &cmd);
@@ -960,25 +1006,6 @@ void iwl_mvm_cleanup_roc_te(struct iwl_mvm *mvm)
 		__iwl_mvm_remove_time_event(mvm, te_data, &uid);
 }
 
-static void iwl_mvm_cancel_session_protection(struct iwl_mvm *mvm,
-					      struct iwl_mvm_vif *mvmvif)
-{
-	struct iwl_mvm_session_prot_cmd cmd = {
-		.id_and_color =
-			cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
-							mvmvif->color)),
-		.action = cpu_to_le32(FW_CTXT_ACTION_REMOVE),
-	};
-	int ret;
-
-	ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SESSION_PROTECTION_CMD,
-						   MAC_CONF_GROUP, 0),
-				   0, sizeof(cmd), &cmd);
-	if (ret)
-		IWL_ERR(mvm,
-			"Couldn't send the SESSION_PROTECTION_CMD: %d\n", ret);
-}
-
 void iwl_mvm_stop_roc(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 {
 	struct iwl_mvm_vif *mvmvif;
@@ -1129,10 +1156,15 @@ void iwl_mvm_schedule_session_protection(struct iwl_mvm *mvm,
 			cpu_to_le32(FW_CMD_ID_AND_COLOR(mvmvif->id,
 							mvmvif->color)),
 		.action = cpu_to_le32(FW_CTXT_ACTION_ADD),
-		.conf_id = cpu_to_le32(SESSION_PROTECT_CONF_ASSOC),
 		.duration_tu = cpu_to_le32(MSEC_TO_TU(duration)),
 	};
 
+	/* The time_event_data.id field is reused to save session
+	 * protection's configuration.
+	 */
+	mvmvif->time_event_data.id = SESSION_PROTECT_CONF_ASSOC;
+	cmd.conf_id = cpu_to_le32(mvmvif->time_event_data.id);
+
 	lockdep_assert_held(&mvm->mutex);
 
 	spin_lock_bh(&mvm->time_event_lock);
-- 
cgit v1.2.3


From 97cc16943f23078535fdbce4f6391b948b4ccc08 Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Sat, 7 Nov 2020 10:50:09 +0200
Subject: iwlwifi: mvm: write queue_sync_state only for sync

We use mvm->queue_sync_state to wait for synchronous queue sync
messages, but if an async one happens inbetween we shouldn't
clear mvm->queue_sync_state after sending the async one, that
can run concurrently (at least from the CPU POV) with another
synchronous queue sync.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Fixes: 3c514bf831ac ("iwlwifi: mvm: add a loose synchronization of the NSSN across Rx queues")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.51a3148f2c14.I0772171dbaec87433a11513e9586d98b5d920b5f@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 688c1125e67b..62e884f3e29e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3127,6 +3127,9 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 			goto out_unlock;
 		}
 
+		if (vif->type == NL80211_IFTYPE_STATION)
+			vif->bss_conf.he_support = sta->he_cap.has_he;
+
 		if (sta->tdls &&
 		    (vif->p2p ||
 		     iwl_mvm_tdls_sta_count(mvm, NULL) ==
-- 
cgit v1.2.3


From edb625208d84aef179e3f16590c1c582fc5fdae6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 7 Nov 2020 10:50:10 +0200
Subject: iwlwifi: pcie: set LTR to avoid completion timeout

On some platforms, the preset values aren't correct and then we may
get a completion timeout in the firmware. Change the LTR configuration
to avoid that. The firmware will do some more complex reinit of this
later, but for the boot process we use ~250usec.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.d83d591c05ba.I42885c9fb500bc08b9a4c07c4ff3d436cc7a3c84@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-csr.h         | 10 ++++++++++
 .../net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
index cb9e8e189a1a..1d48c7d7fffd 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h
@@ -147,6 +147,16 @@
 #define CSR_MAC_SHADOW_REG_CTL2		(CSR_BASE + 0x0AC)
 #define CSR_MAC_SHADOW_REG_CTL2_RX_WAKE	0xFFFF
 
+/* LTR control (since IWL_DEVICE_FAMILY_22000) */
+#define CSR_LTR_LONG_VAL_AD			(CSR_BASE + 0x0D4)
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ	0x80000000
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE	0x1c000000
+#define CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL	0x03ff0000
+#define CSR_LTR_LONG_VAL_AD_SNOOP_REQ		0x00008000
+#define CSR_LTR_LONG_VAL_AD_SNOOP_SCALE		0x00001c00
+#define CSR_LTR_LONG_VAL_AD_SNOOP_VAL		0x000003ff
+#define CSR_LTR_LONG_VAL_AD_SCALE_USEC		2
+
 /* GIO Chicken Bits (PCI Express bus link power management) */
 #define CSR_GIO_CHICKEN_BITS    (CSR_BASE+0x100)
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index a0352fa873d9..5512e3c630c3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -252,6 +252,26 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 
 	iwl_set_bit(trans, CSR_CTXT_INFO_BOOT_CTRL,
 		    CSR_AUTO_FUNC_BOOT_ENA);
+
+	if (trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_AX210) {
+		/*
+		 * The firmware initializes this again later (to a smaller
+		 * value), but for the boot process initialize the LTR to
+		 * ~250 usec.
+		 */
+		u32 val = CSR_LTR_LONG_VAL_AD_NO_SNOOP_REQ |
+			  u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC,
+					  CSR_LTR_LONG_VAL_AD_NO_SNOOP_SCALE) |
+			  u32_encode_bits(250,
+					  CSR_LTR_LONG_VAL_AD_NO_SNOOP_VAL) |
+			  CSR_LTR_LONG_VAL_AD_SNOOP_REQ |
+			  u32_encode_bits(CSR_LTR_LONG_VAL_AD_SCALE_USEC,
+					  CSR_LTR_LONG_VAL_AD_SNOOP_SCALE) |
+			  u32_encode_bits(250, CSR_LTR_LONG_VAL_AD_SNOOP_VAL);
+
+		iwl_write32(trans, CSR_LTR_LONG_VAL_AD, val);
+	}
+
 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
 		iwl_write_umac_prph(trans, UREG_CPU_INIT_RUN, 1);
 	else
-- 
cgit v1.2.3


From fe56d05ee6c87f6a1a8c7267affd92c9438249cc Mon Sep 17 00:00:00 2001
From: Sara Sharon <sara.sharon@intel.com>
Date: Sat, 7 Nov 2020 10:50:11 +0200
Subject: iwlwifi: mvm: fix kernel panic in case of assert during CSA

During CSA, we briefly nullify the phy context, in __iwl_mvm_unassign_vif_chanctx.
In case we have a FW assert right after it, it remains NULL though.
We end up running into endless loop due to mac80211 trying repeatedly to
move us to ASSOC state, and we keep returning -EINVAL. Later down the road
we hit a kernel panic.

Detect and avoid this endless loop.

Signed-off-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/iwlwifi.20201107104557.d64de2c17bff.Iedd0d2afa20a2aacba5259a5cae31cb3a119a4eb@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 62e884f3e29e..b627e7da7ac9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -3080,7 +3080,7 @@ static int iwl_mvm_mac_sta_state(struct ieee80211_hw *hw,
 
 	/* this would be a mac80211 bug ... but don't crash */
 	if (WARN_ON_ONCE(!mvmvif->phy_ctxt))
-		return -EINVAL;
+		return test_bit(IWL_MVM_STATUS_HW_RESTART_REQUESTED, &mvm->status) ? 0 : -EINVAL;
 
 	/*
 	 * If we are in a STA removal flow and in DQA mode:
-- 
cgit v1.2.3


From 14a2e551faea53d45bc11629a9dac88f88950ca7 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Sun, 15 Nov 2020 10:30:04 +0000
Subject: batman-adv: set .owner to THIS_MODULE

If THIS_MODULE is not set, the module would be removed while debugfs is
being used.
It eventually makes kernel panic.

Fixes: c6c8fea29769 ("net: Add batman-adv meshing protocol")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index a67b2b091447..c0ca5fbe5b08 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -180,6 +180,7 @@ static const struct file_operations batadv_log_fops = {
 	.read           = batadv_log_read,
 	.poll           = batadv_log_poll,
 	.llseek         = no_llseek,
+	.owner          = THIS_MODULE,
 };
 
 /**
-- 
cgit v1.2.3


From b5f796b62c98cd8c219c4b788ecb6e1218e648cb Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Wed, 18 Nov 2020 20:17:31 +0800
Subject: bnxt_en: fix error return code in bnxt_init_one()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: c213eae8d3cd ("bnxt_en: Improve VF/PF link change logic.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Link: https://lore.kernel.org/r/1605701851-20270-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 7c21aaa8b9af..092775e396c7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -12674,6 +12674,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 				create_singlethread_workqueue("bnxt_pf_wq");
 			if (!bnxt_pf_wq) {
 				dev_err(&pdev->dev, "Unable to create workqueue.\n");
+				rc = -ENOMEM;
 				goto init_err_pci_clean;
 			}
 		}
-- 
cgit v1.2.3


From 3383176efc0fb0c0900a191026468a58668b4214 Mon Sep 17 00:00:00 2001
From: Zhang Changzhong <zhangchangzhong@huawei.com>
Date: Thu, 19 Nov 2020 21:30:21 +0800
Subject: bnxt_en: fix error return code in bnxt_init_board()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zhang Changzhong <zhangchangzhong@huawei.com>
Reviewed-by: Edwin Peer <edwin.peer@broadcom.com>
Link: https://lore.kernel.org/r/1605792621-6268-1-git-send-email-zhangchangzhong@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 092775e396c7..6e8329534cde 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11590,6 +11590,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	if (dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)) != 0 &&
 	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
 		dev_err(&pdev->dev, "System does not support DMA, aborting\n");
+		rc = -EIO;
 		goto init_err_disable;
 	}
 
-- 
cgit v1.2.3


From f46e79aa1a2bea7de2885fa8d79a68d11545a5fd Mon Sep 17 00:00:00 2001
From: Martin Habets <mhabets@solarflare.com>
Date: Fri, 20 Nov 2020 11:32:07 +0000
Subject: MAINTAINERS: Change Solarflare maintainers

Email from solarflare.com will stop working. Update the maintainers.
A replacement for linux-net-drivers@solarflare.com is not working yet,
for now remove it.

Signed-off-by: Martin Habets <mhabets@solarflare.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Link: https://lore.kernel.org/r/20201120113207.GA1605547@mh-desktop
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index c7f6924d34c7..aaf3b023e044 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15775,9 +15775,8 @@ F:	drivers/slimbus/
 F:	include/linux/slimbus.h
 
 SFC NETWORK DRIVER
-M:	Solarflare linux maintainers <linux-net-drivers@solarflare.com>
-M:	Edward Cree <ecree@solarflare.com>
-M:	Martin Habets <mhabets@solarflare.com>
+M:	Edward Cree <ecree.xilinx@gmail.com>
+M:	Martin Habets <habetsm.xilinx@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	drivers/net/ethernet/sfc/
-- 
cgit v1.2.3


From 3b3fd068c56e3fbea30090859216a368398e39bf Mon Sep 17 00:00:00 2001
From: Anmol Karn <anmol.karan123@gmail.com>
Date: Fri, 20 Nov 2020 00:40:43 +0530
Subject: rose: Fix Null pointer dereference in rose_send_frame()

rose_send_frame() dereferences `neigh->dev` when called from
rose_transmit_clear_request(), and the first occurrence of the
`neigh` is in rose_loopback_timer() as `rose_loopback_neigh`,
and it is initialized in rose_add_loopback_neigh() as NULL.
i.e when `rose_loopback_neigh` used in rose_loopback_timer()
its `->dev` was still NULL and rose_loopback_timer() was calling
rose_rx_call_request() without checking for NULL.

- net/rose/rose_link.c
This bug seems to get triggered in this line:

rose_call = (ax25_address *)neigh->dev->dev_addr;

Fix it by adding NULL checking for `rose_loopback_neigh->dev`
in rose_loopback_timer().

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Suggested-by: Jakub Kicinski <kuba@kernel.org>
Reported-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com
Tested-by: syzbot+a1c743815982d9496393@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?id=9d2a7ca8c7f2e4b682c97578dfa3f236258300b3
Signed-off-by: Anmol Karn <anmol.karan123@gmail.com>
Link: https://lore.kernel.org/r/20201119191043.28813-1-anmol.karan123@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/rose/rose_loopback.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c
index 7b094275ea8b..11c45c8c6c16 100644
--- a/net/rose/rose_loopback.c
+++ b/net/rose/rose_loopback.c
@@ -96,10 +96,19 @@ static void rose_loopback_timer(struct timer_list *unused)
 		}
 
 		if (frametype == ROSE_CALL_REQUEST) {
-			if ((dev = rose_dev_get(dest)) != NULL) {
-				if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0)
-					kfree_skb(skb);
-			} else {
+			if (!rose_loopback_neigh->dev) {
+				kfree_skb(skb);
+				continue;
+			}
+
+			dev = rose_dev_get(dest);
+			if (!dev) {
+				kfree_skb(skb);
+				continue;
+			}
+
+			if (rose_rx_call_request(skb, dev, rose_loopback_neigh, lci_o) == 0) {
+				dev_put(dev);
 				kfree_skb(skb);
 			}
 		} else {
-- 
cgit v1.2.3


From c54bc3ced5106663c2f2b44071800621f505b00e Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Fri, 20 Nov 2020 02:44:31 -0500
Subject: bnxt_en: Release PCI regions when DMA mask setup fails during probe.

Jump to init_err_release to cleanup.  bnxt_unmap_bars() will also be
called but it will do nothing if the BARs are not mapped yet.

Fixes: c0c050c58d84 ("bnxt_en: New Broadcom ethernet driver.")
Reported-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Link: https://lore.kernel.org/r/1605858271-8209-1-git-send-email-michael.chan@broadcom.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 6e8329534cde..0af0af2b70fe 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -11591,7 +11591,7 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	    dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)) != 0) {
 		dev_err(&pdev->dev, "System does not support DMA, aborting\n");
 		rc = -EIO;
-		goto init_err_disable;
+		goto init_err_release;
 	}
 
 	pci_set_master(pdev);
-- 
cgit v1.2.3


From 20ffc7adf53a5fd3d19751fbff7895bcca66686e Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Thu, 19 Nov 2020 18:59:48 +0300
Subject: net/tls: missing received data after fast remote close

In case when tcp socket received FIN after some data and the
parser haven't started before reading data caller will receive
an empty buffer. This behavior differs from plain TCP socket and
leads to special treating in user-space.
The flow that triggers the race is simple. Server sends small
amount of data right after the connection is configured to use TLS
and closes the connection. In this case receiver sees TLS Handshake
data, configures TLS socket right after Change Cipher Spec record.
While the configuration is in process, TCP socket receives small
Application Data record, Encrypted Alert record and FIN packet. So
the TCP socket changes sk_shutdown to RCV_SHUTDOWN and sk_flag with
SK_DONE bit set. The received data is not parsed upon arrival and is
never sent to user-space.

Patch unpauses parser directly if we have unparsed data in tcp
receive queue.

Fixes: fcf4793e278e ("tls: check RCV_SHUTDOWN in tls_wait_data")
Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Link: https://lore.kernel.org/r/1605801588-12236-1-git-send-email-vfedorenko@novek.ru
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/tls/tls_sw.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 2fe9e2cf8659..845c628ac1b2 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1295,6 +1295,12 @@ static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
 			return NULL;
 		}
 
+		if (!skb_queue_empty(&sk->sk_receive_queue)) {
+			__strp_unpause(&ctx->strp);
+			if (ctx->recv_pkt)
+				return ctx->recv_pkt;
+		}
+
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
 			return NULL;
 
-- 
cgit v1.2.3


From bff453921ae105a8dbbad0ed7dd5f5ce424536e7 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <rajur@chelsio.com>
Date: Wed, 18 Nov 2020 20:02:13 +0530
Subject: cxgb4: fix the panic caused by non smac rewrite

SMT entry is allocated only when loopback Source MAC
rewriting is requested. Accessing SMT entry for non
smac rewrite cases results in kernel panic.

Fix the panic caused by non smac rewrite

Fixes: 937d84205884 ("cxgb4: set up filter action after rewrites")
Signed-off-by: Raju Rangoju <rajur@chelsio.com>
Link: https://lore.kernel.org/r/20201118143213.13319-1-rajur@chelsio.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 4e55f7081644..83b46440408b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -880,7 +880,8 @@ int set_filter_wr(struct adapter *adapter, int fidx)
 		 FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) |
 		 FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) |
 		 FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld));
-	fwr->smac_sel = f->smt->idx;
+	if (f->fs.newsmac)
+		fwr->smac_sel = f->smt->idx;
 	fwr->rx_chan_rx_rpl_iq =
 		htons(FW_FILTER_WR_RX_CHAN_V(0) |
 		      FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id));
-- 
cgit v1.2.3


From d2624e70a2f53b6f402fdaeabe7db798148618c5 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Thu, 19 Nov 2020 16:51:06 +0200
Subject: dpaa2-eth: select XGMAC_MDIO for MDIO bus support

Explicitly enable the FSL_XGMAC_MDIO Kconfig option in order to have
MDIO access to internal and external PHYs.

Fixes: 719479230893 ("dpaa2-eth: add MAC/PHY support through phylink")
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://lore.kernel.org/r/20201119145106.712761-1-ciorneiioana@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/dpaa2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index cfd369cf4c8c..c0e05f71826d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -4,6 +4,7 @@ config FSL_DPAA2_ETH
 	depends on FSL_MC_BUS && FSL_MC_DPIO
 	select PHYLINK
 	select PCS_LYNX
+	select FSL_XGMAC_MDIO
 	help
 	  This is the DPAA2 Ethernet driver supporting Freescale SoCs
 	  with DPAA2 (DataPath Acceleration Architecture v2).
-- 
cgit v1.2.3


From 861602b57730a5c6d3e0b1e4ca7133ca9a8b8538 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Thu, 19 Nov 2020 13:23:51 -0800
Subject: tcp: Allow full IP tos/IPv6 tclass to be reflected in L3 header

An issue was recently found where DCTCP SYN/ACK packets did not have the
ECT bit set in the L3 header. A bit of code review found that the recent
change referenced below had gone though and added a mask that prevented the
ECN bits from being populated in the L3 header.

This patch addresses that by rolling back the mask so that it is only
applied to the flags coming from the incoming TCP request instead of
applying it to the socket tos/tclass field. Doing this the ECT bits were
restored in the SYN/ACK packets in my testing.

One thing that is not addressed by this patch set is the fact that
tcp_reflect_tos appears to be incompatible with ECN based congestion
avoidance algorithms. At a minimum the feature should likely be documented
which it currently isn't.

Fixes: ac8f1710c12b ("tcp: reflect tos value received in SYN to the socket")
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp_ipv4.c | 5 +++--
 net/ipv6/tcp_ipv6.c | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7352c097ae48..25d35a56ceba 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -981,7 +981,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
 	tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-			tcp_rsk(req)->syn_tos : inet_sk(sk)->tos;
+			tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+			inet_sk(sk)->tos;
 
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
@@ -990,7 +991,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
 					    rcu_dereference(ireq->ireq_opt),
-					    tos & ~INET_ECN_MASK);
+					    tos);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8db59f4e5f13..3d49e8d0afee 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -530,12 +530,12 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 		rcu_read_lock();
 		opt = ireq->ipv6_opt;
 		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-				tcp_rsk(req)->syn_tos : np->tclass;
+				tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+				np->tclass;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
-			       tclass & ~INET_ECN_MASK,
-			       sk->sk_priority);
+			       tclass, sk->sk_priority);
 		rcu_read_unlock();
 		err = net_xmit_eval(err);
 	}
-- 
cgit v1.2.3


From 55472017a4219ca965a957584affdb17549ae4a4 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Thu, 19 Nov 2020 13:23:58 -0800
Subject: tcp: Set INET_ECN_xmit configuration in tcp_reinit_congestion_control

When setting congestion control via a BPF program it is seen that the
SYN/ACK for packets within a given flow will not include the ECT0 flag. A
bit of simple printk debugging shows that when this is configured without
BPF we will see the value INET_ECN_xmit value initialized in
tcp_assign_congestion_control however when we configure this via BPF the
socket is in the closed state and as such it isn't configured, and I do not
see it being initialized when we transition the socket into the listen
state. The result of this is that the ECT0 bit is configured based on
whatever the default state is for the socket.

Any easy way to reproduce this is to monitor the following with tcpdump:
tools/testing/selftests/bpf/test_progs -t bpf_tcp_ca

Without this patch the SYN/ACK will follow whatever the default is. If dctcp
all SYN/ACK packets will have the ECT0 bit set, and if it is not then ECT0
will be cleared on all SYN/ACK packets. With this patch applied the SYN/ACK
bit matches the value seen on the other packets in the given stream.

Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp_cong.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index db47ac24d057..563d016e7478 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -198,6 +198,11 @@ static void tcp_reinit_congestion_control(struct sock *sk,
 	icsk->icsk_ca_setsockopt = 1;
 	memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
 
+	if (ca->flags & TCP_CONG_NEEDS_ECN)
+		INET_ECN_xmit(sk);
+	else
+		INET_ECN_dontxmit(sk);
+
 	if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)))
 		tcp_init_congestion_control(sk);
 }
-- 
cgit v1.2.3


From 0d0e2b538c13f4f698ba58485a573ce824036567 Mon Sep 17 00:00:00 2001
From: Alexandra Winter <wintera@linux.ibm.com>
Date: Fri, 20 Nov 2020 10:09:36 +0100
Subject: s390/qeth: Remove pnso workaround

Remove workaround that supported early hardware implementations
of PNSO OC3. Rely on the 'enarf' feature bit instead.

Fixes: fa115adff2c1 ("s390/qeth: Detect PNSO OC3 capability")
Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
Reviewed-by: Julian Wiedmann <jwi@linux.ibm.com>
[jwi: use logical instead of bit-wise AND]
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_l2_main.c | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 28f6dda95736..79939ba5d523 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -985,32 +985,19 @@ static void qeth_l2_setup_bridgeport_attrs(struct qeth_card *card)
  *	change notification' and thus can support the learning_sync bridgeport
  *	attribute
  *	@card: qeth_card structure pointer
- *
- *	This is a destructive test and must be called before dev2br or
- *	bridgeport address notification is enabled!
  */
 static void qeth_l2_detect_dev2br_support(struct qeth_card *card)
 {
 	struct qeth_priv *priv = netdev_priv(card->dev);
 	bool dev2br_supported;
-	int rc;
 
 	QETH_CARD_TEXT(card, 2, "d2brsup");
 	if (!IS_IQD(card))
 		return;
 
 	/* dev2br requires valid cssid,iid,chid */
-	if (!card->info.ids_valid) {
-		dev2br_supported = false;
-	} else if (css_general_characteristics.enarf) {
-		dev2br_supported = true;
-	} else {
-		/* Old machines don't have the feature bit:
-		 * Probe by testing whether a disable succeeds
-		 */
-		rc = qeth_l2_pnso(card, PNSO_OC_NET_ADDR_INFO, 0, NULL, NULL);
-		dev2br_supported = !rc;
-	}
+	dev2br_supported = card->info.ids_valid &&
+			   css_general_characteristics.enarf;
 	QETH_CARD_TEXT_(card, 2, "D2Bsup%02x", dev2br_supported);
 
 	if (dev2br_supported)
@@ -2233,7 +2220,6 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
 	struct net_device *dev = card->dev;
 	int rc = 0;
 
-	/* query before bridgeport_notification may be enabled */
 	qeth_l2_detect_dev2br_support(card);
 
 	mutex_lock(&card->sbp_lock);
-- 
cgit v1.2.3


From 34c7f50f7d0d36fa663c74aee39e25e912505320 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 20 Nov 2020 10:09:37 +0100
Subject: s390/qeth: make af_iucv TX notification call more robust

Calling into socket code is ugly already, at least check whether we are
dealing with the expected sk_family. Only looking at skb->protocol is
bound to cause troubles (consider eg. af_packet).

Fixes: b333293058aa ("qeth: add support for af_iucv HiperSockets transport")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 93c9b30ab17a..715f440bdc7c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -33,6 +33,7 @@
 
 #include <net/iucv/af_iucv.h>
 #include <net/dsfield.h>
+#include <net/sock.h>
 
 #include <asm/ebcdic.h>
 #include <asm/chpid.h>
@@ -1405,7 +1406,7 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q,
 	skb_queue_walk(&buf->skb_list, skb) {
 		QETH_CARD_TEXT_(q->card, 5, "skbn%d", notification);
 		QETH_CARD_TEXT_(q->card, 5, "%lx", (long) skb);
-		if (skb->protocol == htons(ETH_P_AF_IUCV) && skb->sk)
+		if (skb->sk && skb->sk->sk_family == PF_IUCV)
 			iucv_sk(skb->sk)->sk_txnotify(skb, notification);
 	}
 }
-- 
cgit v1.2.3


From 8908f36d20d8ba610d3a7d110b3049b5853b9bb1 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 20 Nov 2020 10:09:38 +0100
Subject: s390/qeth: fix af_iucv notification race

The two expected notification sequences are
1. TX_NOTIFY_PENDING with a subsequent TX_NOTIFY_DELAYED_*, when
   our TX completion code first observed the pending TX and the QAOB
   then completes at a later time; or
2. TX_NOTIFY_OK, when qeth_qdio_handle_aob() picked up the QAOB
   completion before our TX completion code even noticed that the TX
   was pending.

But as qeth_iqd_tx_complete() and qeth_qdio_handle_aob() can run
concurrently, we may end up with a race that results in a sequence of
TX_NOTIFY_DELAYED_* followed by TX_NOTIFY_PENDING. Which would confuse
the af_iucv code in its tracking of pending transmits.

Rework the notification code, so that qeth_qdio_handle_aob() defers its
notification if the TX completion code is still active.

Fixes: b333293058aa ("qeth: add support for af_iucv HiperSockets transport")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core.h      |  9 +++--
 drivers/s390/net/qeth_core_main.c | 73 ++++++++++++++++++++++++++++-----------
 2 files changed, 58 insertions(+), 24 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index f73b4756ed5e..b235393e091c 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -417,10 +417,13 @@ enum qeth_qdio_out_buffer_state {
 	QETH_QDIO_BUF_EMPTY,
 	/* Filled by driver; owned by hardware in order to be sent. */
 	QETH_QDIO_BUF_PRIMED,
-	/* Identified to be pending in TPQ. */
+	/* Discovered by the TX completion code: */
 	QETH_QDIO_BUF_PENDING,
-	/* Found in completion queue. */
-	QETH_QDIO_BUF_IN_CQ,
+	/* Finished by the TX completion code: */
+	QETH_QDIO_BUF_NEED_QAOB,
+	/* Received QAOB notification on CQ: */
+	QETH_QDIO_BUF_QAOB_OK,
+	QETH_QDIO_BUF_QAOB_ERROR,
 	/* Handled via transfer pending / completion queue. */
 	QETH_QDIO_BUF_HANDLED_DELAYED,
 };
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 715f440bdc7c..48f9e4a027bf 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -511,6 +511,7 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
 static void qeth_qdio_handle_aob(struct qeth_card *card,
 				 unsigned long phys_aob_addr)
 {
+	enum qeth_qdio_out_buffer_state new_state = QETH_QDIO_BUF_QAOB_OK;
 	struct qaob *aob;
 	struct qeth_qdio_out_buffer *buffer;
 	enum iucv_tx_notify notification;
@@ -522,22 +523,6 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 	buffer = (struct qeth_qdio_out_buffer *) aob->user1;
 	QETH_CARD_TEXT_(card, 5, "%lx", aob->user1);
 
-	if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED,
-			   QETH_QDIO_BUF_IN_CQ) == QETH_QDIO_BUF_PRIMED) {
-		notification = TX_NOTIFY_OK;
-	} else {
-		WARN_ON_ONCE(atomic_read(&buffer->state) !=
-							QETH_QDIO_BUF_PENDING);
-		atomic_set(&buffer->state, QETH_QDIO_BUF_IN_CQ);
-		notification = TX_NOTIFY_DELAYED_OK;
-	}
-
-	if (aob->aorc != 0)  {
-		QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
-		notification = qeth_compute_cq_notification(aob->aorc, 1);
-	}
-	qeth_notify_skbs(buffer->q, buffer, notification);
-
 	/* Free dangling allocations. The attached skbs are handled by
 	 * qeth_cleanup_handled_pending().
 	 */
@@ -549,7 +534,33 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
 		if (data && buffer->is_header[i])
 			kmem_cache_free(qeth_core_header_cache, data);
 	}
-	atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+
+	if (aob->aorc) {
+		QETH_CARD_TEXT_(card, 2, "aorc%02X", aob->aorc);
+		new_state = QETH_QDIO_BUF_QAOB_ERROR;
+	}
+
+	switch (atomic_xchg(&buffer->state, new_state)) {
+	case QETH_QDIO_BUF_PRIMED:
+		/* Faster than TX completion code. */
+		notification = qeth_compute_cq_notification(aob->aorc, 0);
+		qeth_notify_skbs(buffer->q, buffer, notification);
+		atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+		break;
+	case QETH_QDIO_BUF_PENDING:
+		/* TX completion code is active and will handle the async
+		 * completion for us.
+		 */
+		break;
+	case QETH_QDIO_BUF_NEED_QAOB:
+		/* TX completion code is already finished. */
+		notification = qeth_compute_cq_notification(aob->aorc, 1);
+		qeth_notify_skbs(buffer->q, buffer, notification);
+		atomic_set(&buffer->state, QETH_QDIO_BUF_HANDLED_DELAYED);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+	}
 
 	qdio_release_aob(aob);
 }
@@ -1417,9 +1428,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
 	struct qeth_qdio_out_q *queue = buf->q;
 	struct sk_buff *skb;
 
-	/* release may never happen from within CQ tasklet scope */
-	WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ);
-
 	if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING)
 		qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR);
 
@@ -5870,9 +5878,32 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
 
 		if (atomic_cmpxchg(&buffer->state, QETH_QDIO_BUF_PRIMED,
 						   QETH_QDIO_BUF_PENDING) ==
-		    QETH_QDIO_BUF_PRIMED)
+		    QETH_QDIO_BUF_PRIMED) {
 			qeth_notify_skbs(queue, buffer, TX_NOTIFY_PENDING);
 
+			/* Handle race with qeth_qdio_handle_aob(): */
+			switch (atomic_xchg(&buffer->state,
+					    QETH_QDIO_BUF_NEED_QAOB)) {
+			case QETH_QDIO_BUF_PENDING:
+				/* No concurrent QAOB notification. */
+				break;
+			case QETH_QDIO_BUF_QAOB_OK:
+				qeth_notify_skbs(queue, buffer,
+						 TX_NOTIFY_DELAYED_OK);
+				atomic_set(&buffer->state,
+					   QETH_QDIO_BUF_HANDLED_DELAYED);
+				break;
+			case QETH_QDIO_BUF_QAOB_ERROR:
+				qeth_notify_skbs(queue, buffer,
+						 TX_NOTIFY_DELAYED_GENERALERROR);
+				atomic_set(&buffer->state,
+					   QETH_QDIO_BUF_HANDLED_DELAYED);
+				break;
+			default:
+				WARN_ON_ONCE(1);
+			}
+		}
+
 		QETH_CARD_TEXT_(card, 5, "pel%u", bidx);
 
 		/* prepare the queue slot for re-use: */
-- 
cgit v1.2.3


From 7ed10e16e50daf74460f54bc922e27c6863c8d61 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 20 Nov 2020 10:09:39 +0100
Subject: s390/qeth: fix tear down of async TX buffers

When qeth_iqd_tx_complete() detects that a TX buffer requires additional
async completion via QAOB, it might fail to replace the queue entry's
metadata (and ends up triggering recovery).

Assume now that the device gets torn down, overruling the recovery.
If the QAOB notification then arrives before the tear down has
sufficiently progressed, the buffer state is changed to
QETH_QDIO_BUF_HANDLED_DELAYED by qeth_qdio_handle_aob().

The tear down code calls qeth_drain_output_queue(), where
qeth_cleanup_handled_pending() will then attempt to replace such a
buffer _again_. If it succeeds this time, the buffer ends up dangling in
its replacement's ->next_pending list ... where it will never be freed,
since there's no further call to qeth_cleanup_handled_pending().

But the second attempt isn't actually needed, we can simply leave the
buffer on the queue and re-use it after a potential recovery has
completed. The qeth_clear_output_buffer() in qeth_drain_output_queue()
will ensure that it's in a clean state again.

Fixes: 72861ae792c2 ("qeth: recovery through asynchronous delivery")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/s390/net/qeth_core_main.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 48f9e4a027bf..e27319de7b00 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -500,12 +500,6 @@ static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
 
 		}
 	}
-	if (forced_cleanup && (atomic_read(&(q->bufs[bidx]->state)) ==
-					QETH_QDIO_BUF_HANDLED_DELAYED)) {
-		/* for recovery situations */
-		qeth_init_qdio_out_buf(q, bidx);
-		QETH_CARD_TEXT(q->card, 2, "clprecov");
-	}
 }
 
 static void qeth_qdio_handle_aob(struct qeth_card *card,
-- 
cgit v1.2.3


From b9ad3e9f5a7a760ab068e33e1f18d240ba32ce92 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@nuviainc.com>
Date: Fri, 20 Nov 2020 14:28:27 +0000
Subject: bonding: wait for sysfs kobject destruction before freeing struct
 slave

syzkaller found that with CONFIG_DEBUG_KOBJECT_RELEASE=y, releasing a
struct slave device could result in the following splat:

  kobject: 'bonding_slave' (00000000cecdd4fe): kobject_release, parent 0000000074ceb2b2 (delayed 1000)
  bond0 (unregistering): (slave bond_slave_1): Releasing backup interface
  ------------[ cut here ]------------
  ODEBUG: free active (active state 0) object type: timer_list hint: workqueue_select_cpu_near kernel/workqueue.c:1549 [inline]
  ODEBUG: free active (active state 0) object type: timer_list hint: delayed_work_timer_fn+0x0/0x98 kernel/workqueue.c:1600
  WARNING: CPU: 1 PID: 842 at lib/debugobjects.c:485 debug_print_object+0x180/0x240 lib/debugobjects.c:485
  Kernel panic - not syncing: panic_on_warn set ...
  CPU: 1 PID: 842 Comm: kworker/u4:4 Tainted: G S                5.9.0-rc8+ #96
  Hardware name: linux,dummy-virt (DT)
  Workqueue: netns cleanup_net
  Call trace:
   dump_backtrace+0x0/0x4d8 include/linux/bitmap.h:239
   show_stack+0x34/0x48 arch/arm64/kernel/traps.c:142
   __dump_stack lib/dump_stack.c:77 [inline]
   dump_stack+0x174/0x1f8 lib/dump_stack.c:118
   panic+0x360/0x7a0 kernel/panic.c:231
   __warn+0x244/0x2ec kernel/panic.c:600
   report_bug+0x240/0x398 lib/bug.c:198
   bug_handler+0x50/0xc0 arch/arm64/kernel/traps.c:974
   call_break_hook+0x160/0x1d8 arch/arm64/kernel/debug-monitors.c:322
   brk_handler+0x30/0xc0 arch/arm64/kernel/debug-monitors.c:329
   do_debug_exception+0x184/0x340 arch/arm64/mm/fault.c:864
   el1_dbg+0x48/0xb0 arch/arm64/kernel/entry-common.c:65
   el1_sync_handler+0x170/0x1c8 arch/arm64/kernel/entry-common.c:93
   el1_sync+0x80/0x100 arch/arm64/kernel/entry.S:594
   debug_print_object+0x180/0x240 lib/debugobjects.c:485
   __debug_check_no_obj_freed lib/debugobjects.c:967 [inline]
   debug_check_no_obj_freed+0x200/0x430 lib/debugobjects.c:998
   slab_free_hook mm/slub.c:1536 [inline]
   slab_free_freelist_hook+0x190/0x210 mm/slub.c:1577
   slab_free mm/slub.c:3138 [inline]
   kfree+0x13c/0x460 mm/slub.c:4119
   bond_free_slave+0x8c/0xf8 drivers/net/bonding/bond_main.c:1492
   __bond_release_one+0xe0c/0xec8 drivers/net/bonding/bond_main.c:2190
   bond_slave_netdev_event drivers/net/bonding/bond_main.c:3309 [inline]
   bond_netdev_event+0x8f0/0xa70 drivers/net/bonding/bond_main.c:3420
   notifier_call_chain+0xf0/0x200 kernel/notifier.c:83
   __raw_notifier_call_chain kernel/notifier.c:361 [inline]
   raw_notifier_call_chain+0x44/0x58 kernel/notifier.c:368
   call_netdevice_notifiers_info+0xbc/0x150 net/core/dev.c:2033
   call_netdevice_notifiers_extack net/core/dev.c:2045 [inline]
   call_netdevice_notifiers net/core/dev.c:2059 [inline]
   rollback_registered_many+0x6a4/0xec0 net/core/dev.c:9347
   unregister_netdevice_many.part.0+0x2c/0x1c0 net/core/dev.c:10509
   unregister_netdevice_many net/core/dev.c:10508 [inline]
   default_device_exit_batch+0x294/0x338 net/core/dev.c:10992
   ops_exit_list.isra.0+0xec/0x150 net/core/net_namespace.c:189
   cleanup_net+0x44c/0x888 net/core/net_namespace.c:603
   process_one_work+0x96c/0x18c0 kernel/workqueue.c:2269
   worker_thread+0x3f0/0xc30 kernel/workqueue.c:2415
   kthread+0x390/0x498 kernel/kthread.c:292
   ret_from_fork+0x10/0x18 arch/arm64/kernel/entry.S:925

This is a potential use-after-free if the sysfs nodes are being accessed
whilst removing the struct slave, so wait for the object destruction to
complete before freeing the struct slave itself.

Fixes: 07699f9a7c8d ("bonding: add sysfs /slave dir for bond slave devices.")
Fixes: a068aab42258 ("bonding: Fix reference count leak in bond_sysfs_slave_add.")
Cc: Qiushi Wu <wu000273@umn.edu>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Jamie Iles <jamie@nuviainc.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20201120142827.879226-1-jamie@nuviainc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bonding/bond_main.c        | 61 ++++++++++++++++++++++++----------
 drivers/net/bonding/bond_sysfs_slave.c | 18 +---------
 include/net/bonding.h                  |  8 +++++
 3 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 84ecbc6fa0ff..47afc5938c26 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1460,7 +1460,39 @@ static void bond_upper_dev_unlink(struct bonding *bond, struct slave *slave)
 	slave->dev->flags &= ~IFF_SLAVE;
 }
 
-static struct slave *bond_alloc_slave(struct bonding *bond)
+static void slave_kobj_release(struct kobject *kobj)
+{
+	struct slave *slave = to_slave(kobj);
+	struct bonding *bond = bond_get_bond_by_slave(slave);
+
+	cancel_delayed_work_sync(&slave->notify_work);
+	if (BOND_MODE(bond) == BOND_MODE_8023AD)
+		kfree(SLAVE_AD_INFO(slave));
+
+	kfree(slave);
+}
+
+static struct kobj_type slave_ktype = {
+	.release = slave_kobj_release,
+#ifdef CONFIG_SYSFS
+	.sysfs_ops = &slave_sysfs_ops,
+#endif
+};
+
+static int bond_kobj_init(struct slave *slave)
+{
+	int err;
+
+	err = kobject_init_and_add(&slave->kobj, &slave_ktype,
+				   &(slave->dev->dev.kobj), "bonding_slave");
+	if (err)
+		kobject_put(&slave->kobj);
+
+	return err;
+}
+
+static struct slave *bond_alloc_slave(struct bonding *bond,
+				      struct net_device *slave_dev)
 {
 	struct slave *slave = NULL;
 
@@ -1468,11 +1500,17 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
 	if (!slave)
 		return NULL;
 
+	slave->bond = bond;
+	slave->dev = slave_dev;
+
+	if (bond_kobj_init(slave))
+		return NULL;
+
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		SLAVE_AD_INFO(slave) = kzalloc(sizeof(struct ad_slave_info),
 					       GFP_KERNEL);
 		if (!SLAVE_AD_INFO(slave)) {
-			kfree(slave);
+			kobject_put(&slave->kobj);
 			return NULL;
 		}
 	}
@@ -1481,17 +1519,6 @@ static struct slave *bond_alloc_slave(struct bonding *bond)
 	return slave;
 }
 
-static void bond_free_slave(struct slave *slave)
-{
-	struct bonding *bond = bond_get_bond_by_slave(slave);
-
-	cancel_delayed_work_sync(&slave->notify_work);
-	if (BOND_MODE(bond) == BOND_MODE_8023AD)
-		kfree(SLAVE_AD_INFO(slave));
-
-	kfree(slave);
-}
-
 static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
 {
 	info->bond_mode = BOND_MODE(bond);
@@ -1678,14 +1705,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev,
 			goto err_undo_flags;
 	}
 
-	new_slave = bond_alloc_slave(bond);
+	new_slave = bond_alloc_slave(bond, slave_dev);
 	if (!new_slave) {
 		res = -ENOMEM;
 		goto err_undo_flags;
 	}
 
-	new_slave->bond = bond;
-	new_slave->dev = slave_dev;
 	/* Set the new_slave's queue_id to be zero.  Queue ID mapping
 	 * is set via sysfs or module option if desired.
 	 */
@@ -2007,7 +2032,7 @@ err_restore_mtu:
 	dev_set_mtu(slave_dev, new_slave->original_mtu);
 
 err_free:
-	bond_free_slave(new_slave);
+	kobject_put(&new_slave->kobj);
 
 err_undo_flags:
 	/* Enslave of first slave has failed and we need to fix master's mac */
@@ -2187,7 +2212,7 @@ static int __bond_release_one(struct net_device *bond_dev,
 	if (!netif_is_bond_master(slave_dev))
 		slave_dev->priv_flags &= ~IFF_BONDING;
 
-	bond_free_slave(slave);
+	kobject_put(&slave->kobj);
 
 	return 0;
 }
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 9b8346638f69..fd07561da034 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -121,7 +121,6 @@ static const struct slave_attribute *slave_attrs[] = {
 };
 
 #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr)
-#define to_slave(obj)	container_of(obj, struct slave, kobj)
 
 static ssize_t slave_show(struct kobject *kobj,
 			  struct attribute *attr, char *buf)
@@ -132,28 +131,15 @@ static ssize_t slave_show(struct kobject *kobj,
 	return slave_attr->show(slave, buf);
 }
 
-static const struct sysfs_ops slave_sysfs_ops = {
+const struct sysfs_ops slave_sysfs_ops = {
 	.show = slave_show,
 };
 
-static struct kobj_type slave_ktype = {
-#ifdef CONFIG_SYSFS
-	.sysfs_ops = &slave_sysfs_ops,
-#endif
-};
-
 int bond_sysfs_slave_add(struct slave *slave)
 {
 	const struct slave_attribute **a;
 	int err;
 
-	err = kobject_init_and_add(&slave->kobj, &slave_ktype,
-				   &(slave->dev->dev.kobj), "bonding_slave");
-	if (err) {
-		kobject_put(&slave->kobj);
-		return err;
-	}
-
 	for (a = slave_attrs; *a; ++a) {
 		err = sysfs_create_file(&slave->kobj, &((*a)->attr));
 		if (err) {
@@ -171,6 +157,4 @@ void bond_sysfs_slave_del(struct slave *slave)
 
 	for (a = slave_attrs; *a; ++a)
 		sysfs_remove_file(&slave->kobj, &((*a)->attr));
-
-	kobject_put(&slave->kobj);
 }
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 7d132cc1e584..d9d0ff3b0ad3 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -185,6 +185,11 @@ struct slave {
 	struct rtnl_link_stats64 slave_stats;
 };
 
+static inline struct slave *to_slave(struct kobject *kobj)
+{
+	return container_of(kobj, struct slave, kobj);
+}
+
 struct bond_up_slave {
 	unsigned int	count;
 	struct rcu_head rcu;
@@ -750,6 +755,9 @@ extern struct bond_parm_tbl ad_select_tbl[];
 /* exported from bond_netlink.c */
 extern struct rtnl_link_ops bond_link_ops;
 
+/* exported from bond_sysfs_slave.c */
+extern const struct sysfs_ops slave_sysfs_ops;
+
 static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
 	atomic_long_inc(&dev->tx_dropped);
-- 
cgit v1.2.3


From 659fbdcf2f147010a7624f7eac04f4282814b013 Mon Sep 17 00:00:00 2001
From: Tom Seewald <tseewald@gmail.com>
Date: Fri, 20 Nov 2020 13:25:28 -0600
Subject: cxgb4: Fix build failure when CONFIG_TLS=m

After commit 9d2e5e9eeb59 ("cxgb4/ch_ktls: decrypted bit is not enough")
whenever CONFIG_TLS=m and CONFIG_CHELSIO_T4=y, the following build
failure occurs:

ld: drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.o: in function
`cxgb_select_queue':
cxgb4_main.c:(.text+0x2dac): undefined reference to `tls_validate_xmit_skb'

Fix this by ensuring that if TLS is set to be a module, CHELSIO_T4 will
also be compiled as a module. As otherwise the cxgb4 driver will not be
able to access TLS' symbols.

Fixes: 9d2e5e9eeb59 ("cxgb4/ch_ktls: decrypted bit is not enough")
Signed-off-by: Tom Seewald <tseewald@gmail.com>
Link: https://lore.kernel.org/r/20201120192528.615-1-tseewald@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 87cc0ef68b31..8ba0e08e5e64 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -68,7 +68,7 @@ config CHELSIO_T3
 
 config CHELSIO_T4
 	tristate "Chelsio Communications T4/T5/T6 Ethernet support"
-	depends on PCI && (IPV6 || IPV6=n)
+	depends on PCI && (IPV6 || IPV6=n) && (TLS || TLS=n)
 	select FW_LOADER
 	select MDIO
 	select ZLIB_DEFLATE
-- 
cgit v1.2.3


From f33d9e2b48a34e1558b67a473a1fc1d6e793f93c Mon Sep 17 00:00:00 2001
From: Yves-Alexis Perez <corsac@corsac.net>
Date: Thu, 19 Nov 2020 18:24:39 +0100
Subject: usbnet: ipheth: fix connectivity with iOS 14

Starting with iOS 14 released in September 2020, connectivity using the
personal hotspot USB tethering function of iOS devices is broken.

Communication between the host and the device (for example ICMP traffic
or DNS resolution using the DNS service running in the device itself)
works fine, but communication to endpoints further away doesn't work.

Investigation on the matter shows that no UDP and ICMP traffic from the
tethered host is reaching the Internet at all. For TCP traffic there are
exchanges between tethered host and server but packets are modified in
transit leading to impossible communication.

After some trials Matti Vuorela discovered that reducing the URB buffer
size by two bytes restored the previous behavior. While a better
solution might exist to fix the issue, since the protocol is not
publicly documented and considering the small size of the fix, let's do
that.

Tested-by: Matti Vuorela <matti.vuorela@bitfactor.fi>
Signed-off-by: Yves-Alexis Perez <corsac@corsac.net>
Link: https://lore.kernel.org/linux-usb/CAAn0qaXmysJ9vx3ZEMkViv_B19ju-_ExN8Yn_uSefxpjS6g4Lw@mail.gmail.com/
Link: https://github.com/libimobiledevice/libimobiledevice/issues/1038
Link: https://lore.kernel.org/r/20201119172439.94988-1-corsac@corsac.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/ipheth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c
index b09b45382faf..207e59e74935 100644
--- a/drivers/net/usb/ipheth.c
+++ b/drivers/net/usb/ipheth.c
@@ -59,7 +59,7 @@
 #define IPHETH_USBINTF_SUBCLASS 253
 #define IPHETH_USBINTF_PROTO    1
 
-#define IPHETH_BUF_SIZE         1516
+#define IPHETH_BUF_SIZE         1514
 #define IPHETH_IP_ALIGN		2	/* padding at front of URB */
 #define IPHETH_TX_TIMEOUT       (5 * HZ)
 
-- 
cgit v1.2.3


From c5dab0941fcdc9664eb0ec0d4d51433216d91336 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Fri, 20 Nov 2020 11:06:57 +0100
Subject: net/af_iucv: set correct sk_protocol for child sockets

Child sockets erroneously inherit their parent's sk_type (ie. SOCK_*),
instead of the PF_IUCV protocol that the parent was created with in
iucv_sock_create().

We're currently not using sk->sk_protocol ourselves, so this shouldn't
have much impact (except eg. getting the output in skb_dump() right).

Fixes: eac3731bd04c ("[S390]: Add AF_IUCV socket support")
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Link: https://lore.kernel.org/r/20201120100657.34407-1-jwi@linux.ibm.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/iucv/af_iucv.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 047238f01ba6..db7d888914fa 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1645,7 +1645,7 @@ static int iucv_callback_connreq(struct iucv_path *path,
 	}
 
 	/* Create the new socket */
-	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+	nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
 	if (!nsk) {
 		err = pr_iucv->path_sever(path, user_data);
 		iucv_path_free(path);
@@ -1851,7 +1851,7 @@ static int afiucv_hs_callback_syn(struct sock *sk, struct sk_buff *skb)
 		goto out;
 	}
 
-	nsk = iucv_sock_alloc(NULL, sk->sk_type, GFP_ATOMIC, 0);
+	nsk = iucv_sock_alloc(NULL, sk->sk_protocol, GFP_ATOMIC, 0);
 	bh_lock_sock(sk);
 	if ((sk->sk_state != IUCV_LISTEN) ||
 	    sk_acceptq_is_full(sk) ||
-- 
cgit v1.2.3


From 5aac0390a63b8718237a61dd0d24a29201d1c94a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 20 Nov 2020 07:59:54 -0700
Subject: tun: honor IOCB_NOWAIT flag

tun only checks the file O_NONBLOCK flag, but it should also be checking
the iocb IOCB_NOWAIT flag. Any fops using ->read/write_iter() should check
both, otherwise it breaks users that correctly expect O_NONBLOCK semantics
if IOCB_NOWAIT is set.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/e9451860-96cc-c7c7-47b8-fe42cadd5f4c@kernel.dk
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/tun.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index be69d272052f..cd06cae76035 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1961,12 +1961,15 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun = tun_get(tfile);
 	ssize_t result;
+	int noblock = 0;
 
 	if (!tun)
 		return -EBADFD;
 
-	result = tun_get_user(tun, tfile, NULL, from,
-			      file->f_flags & O_NONBLOCK, false);
+	if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+		noblock = 1;
+
+	result = tun_get_user(tun, tfile, NULL, from, noblock, false);
 
 	tun_put(tun);
 	return result;
@@ -2185,10 +2188,15 @@ static ssize_t tun_chr_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct tun_file *tfile = file->private_data;
 	struct tun_struct *tun = tun_get(tfile);
 	ssize_t len = iov_iter_count(to), ret;
+	int noblock = 0;
 
 	if (!tun)
 		return -EBADFD;
-	ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
+
+	if ((file->f_flags & O_NONBLOCK) || (iocb->ki_flags & IOCB_NOWAIT))
+		noblock = 1;
+
+	ret = tun_do_read(tun, tfile, to, noblock, NULL);
 	ret = min_t(ssize_t, ret, len);
 	if (ret > 0)
 		iocb->ki_pos = ret;
-- 
cgit v1.2.3


From 8393597579f5250636f1cff157ea73f402b6501e Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Fri, 20 Nov 2020 16:40:11 -0600
Subject: ibmvnic: fix call_netdevice_notifiers in do_reset

When netdev_notify_peers was substituted in
commit 986103e7920c ("net/ibmvnic: Fix RTNL deadlock during device reset"),
call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev) was missed.
Fix it now.

Fixes: 986103e7920c ("net/ibmvnic: Fix RTNL deadlock during device reset")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Reviewed-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index da15913879f8..eface3543b2c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2074,8 +2074,10 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_schedule(&adapter->napi[i]);
 
-	if (adapter->reset_reason != VNIC_RESET_FAILOVER)
+	if (adapter->reset_reason != VNIC_RESET_FAILOVER) {
 		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
+		call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev);
+	}
 
 	rc = 0;
 
-- 
cgit v1.2.3


From 98025bce3a6200a0c4637272a33b5913928ba5b8 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Fri, 20 Nov 2020 16:40:12 -0600
Subject: ibmvnic: notify peers when failover and migration happen

Commit 61d3e1d9bc2a ("ibmvnic: Remove netdev notify for failover resets")
excluded the failover case for notify call because it said
netdev_notify_peers() can cause network traffic to stall or halt.
Current testing does not show network traffic stall
or halt because of the notify call for failover event.
netdev_notify_peers may be used when a device wants to inform the
rest of the network about some sort of a reconfiguration
such as failover or migration.

It is unnecessary to call that in other events like
FATAL, NON_FATAL, CHANGE_PARAM, and TIMEOUT resets
since in those scenarios the hardware does not change.
If the driver must do a hard reset, it is necessary to notify peers.

Fixes: 61d3e1d9bc2a ("ibmvnic: Remove netdev notify for failover resets")
Suggested-by: Brian King <brking@linux.vnet.ibm.com>
Suggested-by: Pradeep Satyanarayana <pradeeps@linux.vnet.ibm.com>
Signed-off-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index eface3543b2c..9665532a9ed2 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2074,7 +2074,8 @@ static int do_reset(struct ibmvnic_adapter *adapter,
 	for (i = 0; i < adapter->req_rx_queues; i++)
 		napi_schedule(&adapter->napi[i]);
 
-	if (adapter->reset_reason != VNIC_RESET_FAILOVER) {
+	if (adapter->reset_reason == VNIC_RESET_FAILOVER ||
+	    adapter->reset_reason == VNIC_RESET_MOBILITY) {
 		call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
 		call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev);
 	}
@@ -2147,6 +2148,9 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
 	if (rc)
 		return IBMVNIC_OPEN_FAILED;
 
+	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, netdev);
+	call_netdevice_notifiers(NETDEV_RESEND_IGMP, netdev);
+
 	return 0;
 }
 
-- 
cgit v1.2.3


From 855a631a4c11458a9cef1ab79c1530436aa95fae Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Fri, 20 Nov 2020 16:40:13 -0600
Subject: ibmvnic: skip tx timeout reset while in resetting

Sometimes it takes longer than 5 seconds (watchdog timeout) to complete
failover, migration, and other resets. In stead of scheduling another
timeout reset, we wait for the current one to complete.

Suggested-by: Brian King <brking@linux.vnet.ibm.com>
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Reviewed-by: Dany Madden <drt@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 9665532a9ed2..2aa40b2f225c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2356,6 +2356,12 @@ static void ibmvnic_tx_timeout(struct net_device *dev, unsigned int txqueue)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
 
+	if (test_bit(0, &adapter->resetting)) {
+		netdev_err(adapter->netdev,
+			   "Adapter is resetting, skip timeout reset\n");
+		return;
+	}
+
 	ibmvnic_reset(adapter, VNIC_RESET_TIMEOUT);
 }
 
-- 
cgit v1.2.3


From 01770a166165738a6e05c3d911fb4609cc4eb416 Mon Sep 17 00:00:00 2001
From: Ricardo Dias <rdias@singlestore.com>
Date: Fri, 20 Nov 2020 11:11:33 +0000
Subject: tcp: fix race condition when creating child sockets from syncookies

When the TCP stack is in SYN flood mode, the server child socket is
created from the SYN cookie received in a TCP packet with the ACK flag
set.

The child socket is created when the server receives the first TCP
packet with a valid SYN cookie from the client. Usually, this packet
corresponds to the final step of the TCP 3-way handshake, the ACK
packet. But is also possible to receive a valid SYN cookie from the
first TCP data packet sent by the client, and thus create a child socket
from that SYN cookie.

Since a client socket is ready to send data as soon as it receives the
SYN+ACK packet from the server, the client can send the ACK packet (sent
by the TCP stack code), and the first data packet (sent by the userspace
program) almost at the same time, and thus the server will equally
receive the two TCP packets with valid SYN cookies almost at the same
instant.

When such event happens, the TCP stack code has a race condition that
occurs between the momement a lookup is done to the established
connections hashtable to check for the existence of a connection for the
same client, and the moment that the child socket is added to the
established connections hashtable. As a consequence, this race condition
can lead to a situation where we add two child sockets to the
established connections hashtable and deliver two sockets to the
userspace program to the same client.

This patch fixes the race condition by checking if an existing child
socket exists for the same client when we are adding the second child
socket to the established connections socket. If an existing child
socket exists, we drop the packet and discard the second child socket
to the same client.

Signed-off-by: Ricardo Dias <rdias@singlestore.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20201120111133.GA67501@rdias-suse-pc.lan
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/inet_hashtables.h   |  5 +--
 net/dccp/ipv4.c                 |  2 +-
 net/dccp/ipv6.c                 |  2 +-
 net/ipv4/inet_connection_sock.c |  2 +-
 net/ipv4/inet_hashtables.c      | 68 ++++++++++++++++++++++++++++++++++++-----
 net/ipv4/tcp_ipv4.c             | 15 +++++++--
 net/ipv6/tcp_ipv6.c             | 13 +++++++-
 7 files changed, 91 insertions(+), 16 deletions(-)

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 92560974ea67..ca6a3ea9057e 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -247,8 +247,9 @@ void inet_hashinfo2_init(struct inet_hashinfo *h, const char *name,
 			 unsigned long high_limit);
 int inet_hashinfo2_init_mod(struct inet_hashinfo *h);
 
-bool inet_ehash_insert(struct sock *sk, struct sock *osk);
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk);
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk);
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk,
+			 bool *found_dup_sk);
 int __inet_hash(struct sock *sk, struct sock *osk);
 int inet_hash(struct sock *sk);
 void inet_unhash(struct sock *sk);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index bb3d70664dde..b0b6e6a4784e 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -427,7 +427,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
 
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
-	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
 	if (*own_req)
 		ireq->ireq_opt = NULL;
 	else
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ef4ab28cfde0..78ee1b5acf1f 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -533,7 +533,7 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 		dccp_done(newsk);
 		goto out;
 	}
-	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
 	/* Clone pktoptions received with SYN, if we own the req */
 	if (*own_req && ireq->pktopts) {
 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 4148f5f78f31..f60869acbef0 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -787,7 +787,7 @@ static void reqsk_queue_hash_req(struct request_sock *req,
 	timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED);
 	mod_timer(&req->rsk_timer, jiffies + timeout);
 
-	inet_ehash_insert(req_to_sk(req), NULL);
+	inet_ehash_insert(req_to_sk(req), NULL, NULL);
 	/* before letting lookups find us, make sure all req fields
 	 * are committed to memory and refcnt initialized.
 	 */
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 8cbe74313f38..45fb450b4522 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -20,6 +20,9 @@
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/inet6_hashtables.h>
+#endif
 #include <net/secure_seq.h>
 #include <net/ip.h>
 #include <net/tcp.h>
@@ -508,10 +511,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
 					  inet->inet_dport);
 }
 
-/* insert a socket into ehash, and eventually remove another one
- * (The another one can be a SYN_RECV or TIMEWAIT
+/* Searches for an exsiting socket in the ehash bucket list.
+ * Returns true if found, false otherwise.
  */
-bool inet_ehash_insert(struct sock *sk, struct sock *osk)
+static bool inet_ehash_lookup_by_sk(struct sock *sk,
+				    struct hlist_nulls_head *list)
+{
+	const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
+	const int sdif = sk->sk_bound_dev_if;
+	const int dif = sk->sk_bound_dev_if;
+	const struct hlist_nulls_node *node;
+	struct net *net = sock_net(sk);
+	struct sock *esk;
+
+	INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
+
+	sk_nulls_for_each_rcu(esk, node, list) {
+		if (esk->sk_hash != sk->sk_hash)
+			continue;
+		if (sk->sk_family == AF_INET) {
+			if (unlikely(INET_MATCH(esk, net, acookie,
+						sk->sk_daddr,
+						sk->sk_rcv_saddr,
+						ports, dif, sdif))) {
+				return true;
+			}
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		else if (sk->sk_family == AF_INET6) {
+			if (unlikely(INET6_MATCH(esk, net,
+						 &sk->sk_v6_daddr,
+						 &sk->sk_v6_rcv_saddr,
+						 ports, dif, sdif))) {
+				return true;
+			}
+		}
+#endif
+	}
+	return false;
+}
+
+/* Insert a socket into ehash, and eventually remove another one
+ * (The another one can be a SYN_RECV or TIMEWAIT)
+ * If an existing socket already exists, socket sk is not inserted,
+ * and sets found_dup_sk parameter to true.
+ */
+bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
 {
 	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
 	struct hlist_nulls_head *list;
@@ -530,16 +575,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
 	if (osk) {
 		WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
 		ret = sk_nulls_del_node_init_rcu(osk);
+	} else if (found_dup_sk) {
+		*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+		if (*found_dup_sk)
+			ret = false;
 	}
+
 	if (ret)
 		__sk_nulls_add_node_rcu(sk, list);
+
 	spin_unlock(lock);
+
 	return ret;
 }
 
-bool inet_ehash_nolisten(struct sock *sk, struct sock *osk)
+bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
 {
-	bool ok = inet_ehash_insert(sk, osk);
+	bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
 
 	if (ok) {
 		sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -583,7 +635,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
 	int err = 0;
 
 	if (sk->sk_state != TCP_LISTEN) {
-		inet_ehash_nolisten(sk, osk);
+		inet_ehash_nolisten(sk, osk, NULL);
 		return 0;
 	}
 	WARN_ON(!sk_unhashed(sk));
@@ -679,7 +731,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		tb = inet_csk(sk)->icsk_bind_hash;
 		spin_lock_bh(&head->lock);
 		if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-			inet_ehash_nolisten(sk, NULL);
+			inet_ehash_nolisten(sk, NULL, NULL);
 			spin_unlock_bh(&head->lock);
 			return 0;
 		}
@@ -758,7 +810,7 @@ ok:
 	inet_bind_hash(sk, tb, port);
 	if (sk_unhashed(sk)) {
 		inet_sk(sk)->inet_sport = htons(port);
-		inet_ehash_nolisten(sk, (struct sock *)tw);
+		inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
 	}
 	if (tw)
 		inet_twsk_bind_unhash(tw, hinfo);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 25d35a56ceba..c95fcdfeed42 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1499,6 +1499,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 				  bool *own_req)
 {
 	struct inet_request_sock *ireq;
+	bool found_dup_sk = false;
 	struct inet_sock *newinet;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
@@ -1576,12 +1577,22 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 
 	if (__inet_inherit_port(sk, newsk) < 0)
 		goto put_and_exit;
-	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+				       &found_dup_sk);
 	if (likely(*own_req)) {
 		tcp_move_syn(newtp, req);
 		ireq->ireq_opt = NULL;
 	} else {
-		newinet->inet_opt = NULL;
+		if (!req_unhash && found_dup_sk) {
+			/* This code path should only be executed in the
+			 * syncookie case only
+			 */
+			bh_unlock_sock(newsk);
+			sock_put(newsk);
+			newsk = NULL;
+		} else {
+			newinet->inet_opt = NULL;
+		}
 	}
 	return newsk;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 3d49e8d0afee..d2502911b7fa 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1193,6 +1193,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
 	struct ipv6_txoptions *opt;
 	struct inet_sock *newinet;
+	bool found_dup_sk = false;
 	struct tcp_sock *newtp;
 	struct sock *newsk;
 #ifdef CONFIG_TCP_MD5SIG
@@ -1368,7 +1369,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 		tcp_done(newsk);
 		goto out;
 	}
-	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+				       &found_dup_sk);
 	if (*own_req) {
 		tcp_move_syn(newtp, req);
 
@@ -1383,6 +1385,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 				skb_set_owner_r(newnp->pktoptions, newsk);
 			}
 		}
+	} else {
+		if (!req_unhash && found_dup_sk) {
+			/* This code path should only be executed in the
+			 * syncookie case only
+			 */
+			bh_unlock_sock(newsk);
+			sock_put(newsk);
+			newsk = NULL;
+		}
 	}
 
 	return newsk;
-- 
cgit v1.2.3


From 3fe356d58efae54dade9ec94ea7c919ed20cf4db Mon Sep 17 00:00:00 2001
From: Stefano Garzarella <sgarzare@redhat.com>
Date: Fri, 20 Nov 2020 11:47:36 +0100
Subject: vsock/virtio: discard packets only when socket is really closed

Starting from commit 8692cefc433f ("virtio_vsock: Fix race condition
in virtio_transport_recv_pkt"), we discard packets in
virtio_transport_recv_pkt() if the socket has been released.

When the socket is connected, we schedule a delayed work to wait the
RST packet from the other peer, also if SHUTDOWN_MASK is set in
sk->sk_shutdown.
This is done to complete the virtio-vsock shutdown algorithm, releasing
the port assigned to the socket definitively only when the other peer
has consumed all the packets.

If we discard the RST packet received, the socket will be closed only
when the VSOCK_CLOSE_TIMEOUT is reached.

Sergio discovered the issue while running ab(1) HTTP benchmark using
libkrun [1] and observing a latency increase with that commit.

To avoid this issue, we discard packet only if the socket is really
closed (SOCK_DONE flag is set).
We also set SOCK_DONE in virtio_transport_release() when we don't need
to wait any packets from the other peer (we didn't schedule the delayed
work). In this case we remove the socket from the vsock lists, releasing
the port assigned.

[1] https://github.com/containers/libkrun

Fixes: 8692cefc433f ("virtio_vsock: Fix race condition in virtio_transport_recv_pkt")
Cc: justin.he@arm.com
Reported-by: Sergio Lopez <slp@redhat.com>
Tested-by: Sergio Lopez <slp@redhat.com>
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
Acked-by: Jia He <justin.he@arm.com>
Link: https://lore.kernel.org/r/20201120104736.73749-1-sgarzare@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/vmw_vsock/virtio_transport_common.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 0edda1edf988..5956939eebb7 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -841,8 +841,10 @@ void virtio_transport_release(struct vsock_sock *vsk)
 		virtio_transport_free_pkt(pkt);
 	}
 
-	if (remove_sock)
+	if (remove_sock) {
+		sock_set_flag(sk, SOCK_DONE);
 		vsock_remove_sock(vsk);
+	}
 }
 EXPORT_SYMBOL_GPL(virtio_transport_release);
 
@@ -1132,8 +1134,8 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
 
 	lock_sock(sk);
 
-	/* Check if sk has been released before lock_sock */
-	if (sk->sk_shutdown == SHUTDOWN_MASK) {
+	/* Check if sk has been closed before lock_sock */
+	if (sock_flag(sk, SOCK_DONE)) {
 		(void)virtio_transport_reset_no_sock(t, pkt);
 		release_sock(sk);
 		sock_put(sk);
-- 
cgit v1.2.3


From 2980cbd4dce7b1e9bf57df3ced43a7b184986f50 Mon Sep 17 00:00:00 2001
From: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Date: Fri, 20 Nov 2020 10:06:40 -0800
Subject: i40e: Fix removing driver while bare-metal VFs pass traffic

Prevent VFs from resetting when PF driver is being unloaded:
- introduce new pf state: __I40E_VF_RESETS_DISABLED;
- check if pf state has __I40E_VF_RESETS_DISABLED state set,
  if so, disable any further VFLR event notifications;
- when i40e_remove (rmmod i40e) is called, disable any resets on
  the VFs;

Previously if there were bare-metal VFs passing traffic and PF
driver was removed, there was a possibility of VFs triggering a Tx
timeout right before iavf_remove. This was causing iavf_close to
not be called because there is a check in the beginning of  iavf_remove
that bails out early if adapter->state < IAVF_DOWN_PENDING. This
makes it so some resources do not get cleaned up.

Fixes: 6a9ddb36eeb8 ("i40e: disable IOV before freeing resources")
Signed-off-by: Slawomir Laba <slawomirx.laba@intel.com>
Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Link: https://lore.kernel.org/r/20201120180640.3654474-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/i40e/i40e.h             |  1 +
 drivers/net/ethernet/intel/i40e/i40e_main.c        | 22 ++++++++++++------
 drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c | 26 +++++++++++++---------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 537300e762f0..d231a2cdd98f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -140,6 +140,7 @@ enum i40e_state_t {
 	__I40E_CLIENT_RESET,
 	__I40E_VIRTCHNL_OP_PENDING,
 	__I40E_RECOVERY_MODE,
+	__I40E_VF_RESETS_DISABLED,	/* disable resets during i40e_remove */
 	/* This must be last as it determines the size of the BITMAP */
 	__I40E_STATE_SIZE__,
 };
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 4f8a2154b93f..1337686bd099 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -4010,8 +4010,16 @@ static irqreturn_t i40e_intr(int irq, void *data)
 	}
 
 	if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
-		ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
-		set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+		/* disable any further VFLR event notifications */
+		if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state)) {
+			u32 reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+
+			reg &= ~I40E_PFINT_ICR0_VFLR_MASK;
+			wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+		} else {
+			ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
+			set_bit(__I40E_VFLR_EVENT_PENDING, pf->state);
+		}
 	}
 
 	if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
@@ -15311,6 +15319,11 @@ static void i40e_remove(struct pci_dev *pdev)
 	while (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state))
 		usleep_range(1000, 2000);
 
+	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+		set_bit(__I40E_VF_RESETS_DISABLED, pf->state);
+		i40e_free_vfs(pf);
+		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+	}
 	/* no more scheduling of any task */
 	set_bit(__I40E_SUSPENDED, pf->state);
 	set_bit(__I40E_DOWN, pf->state);
@@ -15337,11 +15350,6 @@ static void i40e_remove(struct pci_dev *pdev)
 	 */
 	i40e_notify_client_of_netdev_close(pf->vsi[pf->lan_vsi], false);
 
-	if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
-		i40e_free_vfs(pf);
-		pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
-	}
-
 	i40e_fdir_teardown(pf);
 
 	/* If there is a switch structure or any orphans, remove them.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 4919d22d7b6b..1b5390ec3d78 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -1403,7 +1403,8 @@ static void i40e_cleanup_reset_vf(struct i40e_vf *vf)
  * @vf: pointer to the VF structure
  * @flr: VFLR was issued or not
  *
- * Returns true if the VF is reset, false otherwise.
+ * Returns true if the VF is in reset, resets successfully, or resets
+ * are disabled and false otherwise.
  **/
 bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
 {
@@ -1413,11 +1414,14 @@ bool i40e_reset_vf(struct i40e_vf *vf, bool flr)
 	u32 reg;
 	int i;
 
+	if (test_bit(__I40E_VF_RESETS_DISABLED, pf->state))
+		return true;
+
 	/* If the VFs have been disabled, this means something else is
 	 * resetting the VF, so we shouldn't continue.
 	 */
 	if (test_and_set_bit(__I40E_VF_DISABLE, pf->state))
-		return false;
+		return true;
 
 	i40e_trigger_vf_reset(vf, flr);
 
@@ -1581,6 +1585,15 @@ void i40e_free_vfs(struct i40e_pf *pf)
 
 	i40e_notify_client_of_vf_enable(pf, 0);
 
+	/* Disable IOV before freeing resources. This lets any VF drivers
+	 * running in the host get themselves cleaned up before we yank
+	 * the carpet out from underneath their feet.
+	 */
+	if (!pci_vfs_assigned(pf->pdev))
+		pci_disable_sriov(pf->pdev);
+	else
+		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
+
 	/* Amortize wait time by stopping all VFs at the same time */
 	for (i = 0; i < pf->num_alloc_vfs; i++) {
 		if (test_bit(I40E_VF_STATE_INIT, &pf->vf[i].vf_states))
@@ -1596,15 +1609,6 @@ void i40e_free_vfs(struct i40e_pf *pf)
 		i40e_vsi_wait_queues_disabled(pf->vsi[pf->vf[i].lan_vsi_idx]);
 	}
 
-	/* Disable IOV before freeing resources. This lets any VF drivers
-	 * running in the host get themselves cleaned up before we yank
-	 * the carpet out from underneath their feet.
-	 */
-	if (!pci_vfs_assigned(pf->pdev))
-		pci_disable_sriov(pf->pdev);
-	else
-		dev_warn(&pf->pdev->dev, "VFs are assigned - not disabling SR-IOV\n");
-
 	/* free up VF resources */
 	tmp = pf->num_alloc_vfs;
 	pf->num_alloc_vfs = 0;
-- 
cgit v1.2.3


From d549699048b4b5c22dd710455bcdb76966e55aa3 Mon Sep 17 00:00:00 2001
From: Eyal Birger <eyal.birger@gmail.com>
Date: Sat, 21 Nov 2020 08:28:17 +0200
Subject: net/packet: fix packet receive on L3 devices without visible hard
 header

In the patchset merged by commit b9fcf0a0d826
("Merge branch 'support-AF_PACKET-for-layer-3-devices'") L3 devices which
did not have header_ops were given one for the purpose of protocol parsing
on af_packet transmit path.

That change made af_packet receive path regard these devices as having a
visible L3 header and therefore aligned incoming skb->data to point to the
skb's mac_header. Some devices, such as ipip, xfrmi, and others, do not
reset their mac_header prior to ingress and therefore their incoming
packets became malformed.

Ideally these devices would reset their mac headers, or af_packet would be
able to rely on dev->hard_header_len being 0 for such cases, but it seems
this is not the case.

Fix by changing af_packet RX ll visibility criteria to include the
existence of a '.create()' header operation, which is used when creating
a device hard header - via dev_hard_header() - by upper layers, and does
not exist in these L3 devices.

As this predicate may be useful in other situations, add it as a common
dev_has_header() helper in netdevice.h.

Fixes: b9fcf0a0d826 ("Merge branch 'support-AF_PACKET-for-layer-3-devices'")
Signed-off-by: Eyal Birger <eyal.birger@gmail.com>
Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20201121062817.3178900-1-eyal.birger@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netdevice.h |  5 +++++
 net/packet/af_packet.c    | 18 +++++++++---------
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 964b494b0e8d..fa275a054f46 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3137,6 +3137,11 @@ static inline bool dev_validate_header(const struct net_device *dev,
 	return false;
 }
 
+static inline bool dev_has_header(const struct net_device *dev)
+{
+	return dev->header_ops && dev->header_ops->create;
+}
+
 typedef int gifconf_func_t(struct net_device * dev, char __user * bufptr,
 			   int len, int size);
 int register_gifconf(unsigned int family, gifconf_func_t *gifconf);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index cefbd50c1090..7a18ffff8551 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -93,8 +93,8 @@
 
 /*
    Assumptions:
-   - If the device has no dev->header_ops, there is no LL header visible
-     above the device. In this case, its hard_header_len should be 0.
+   - If the device has no dev->header_ops->create, there is no LL header
+     visible above the device. In this case, its hard_header_len should be 0.
      The device may prepend its own header internally. In this case, its
      needed_headroom should be set to the space needed for it to add its
      internal header.
@@ -108,26 +108,26 @@
 On receive:
 -----------
 
-Incoming, dev->header_ops != NULL
+Incoming, dev_has_header(dev) == true
    mac_header -> ll header
    data       -> data
 
-Outgoing, dev->header_ops != NULL
+Outgoing, dev_has_header(dev) == true
    mac_header -> ll header
    data       -> ll header
 
-Incoming, dev->header_ops == NULL
+Incoming, dev_has_header(dev) == false
    mac_header -> data
      However drivers often make it point to the ll header.
      This is incorrect because the ll header should be invisible to us.
    data       -> data
 
-Outgoing, dev->header_ops == NULL
+Outgoing, dev_has_header(dev) == false
    mac_header -> data. ll header is invisible to us.
    data       -> data
 
 Resume
-  If dev->header_ops == NULL we are unable to restore the ll header,
+  If dev_has_header(dev) == false we are unable to restore the ll header,
     because it is invisible to us.
 
 
@@ -2069,7 +2069,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	skb->dev = dev;
 
-	if (dev->header_ops) {
+	if (dev_has_header(dev)) {
 		/* The device has an explicit notion of ll header,
 		 * exported to higher levels.
 		 *
@@ -2198,7 +2198,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (!net_eq(dev_net(dev), sock_net(sk)))
 		goto drop;
 
-	if (dev->header_ops) {
+	if (dev_has_header(dev)) {
 		if (sk->sk_type != SOCK_DGRAM)
 			skb_push(skb, skb->data - skb_mac_header(skb));
 		else if (skb->pkt_type == PACKET_OUTGOING) {
-- 
cgit v1.2.3


From 9bd2702d292cb7b565b09e949d30288ab7a26d51 Mon Sep 17 00:00:00 2001
From: Lincoln Ramsay <lincoln.ramsay@opengear.com>
Date: Mon, 23 Nov 2020 21:40:43 +0000
Subject: aquantia: Remove the build_skb path

When performing IPv6 forwarding, there is an expectation that SKBs
will have some headroom. When forwarding a packet from the aquantia
driver, this does not always happen, triggering a kernel warning.

aq_ring.c has this code (edited slightly for brevity):

if (buff->is_eop && buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
    skb = build_skb(aq_buf_vaddr(&buff->rxdata), AQ_CFG_RX_FRAME_MAX);
} else {
    skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);

There is a significant difference between the SKB produced by these
2 code paths. When napi_alloc_skb creates an SKB, there is a certain
amount of headroom reserved. However, this is not done in the
build_skb codepath.

As the hardware buffer that build_skb is built around does not
handle the presence of the SKB header, this code path is being
removed and the napi_alloc_skb path will always be used. This code
path does have to copy the packet header into the SKB, but it adds
the packet data as a frag.

Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code")
Signed-off-by: Lincoln Ramsay <lincoln.ramsay@opengear.com>
Link: https://lore.kernel.org/r/MWHPR1001MB23184F3EAFA413E0D1910EC9E8FC0@MWHPR1001MB2318.namprd10.prod.outlook.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 126 ++++++++++-------------
 1 file changed, 52 insertions(+), 74 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index 4f913658eea4..24122ccda614 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -413,85 +413,63 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 					      buff->rxdata.pg_off,
 					      buff->len, DMA_FROM_DEVICE);
 
-		/* for single fragment packets use build_skb() */
-		if (buff->is_eop &&
-		    buff->len <= AQ_CFG_RX_FRAME_MAX - AQ_SKB_ALIGN) {
-			skb = build_skb(aq_buf_vaddr(&buff->rxdata),
+		skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
+		if (unlikely(!skb)) {
+			u64_stats_update_begin(&self->stats.rx.syncp);
+			self->stats.rx.skb_alloc_fails++;
+			u64_stats_update_end(&self->stats.rx.syncp);
+			err = -ENOMEM;
+			goto err_exit;
+		}
+		if (is_ptp_ring)
+			buff->len -=
+				aq_ptp_extract_ts(self->aq_nic, skb,
+						  aq_buf_vaddr(&buff->rxdata),
+						  buff->len);
+
+		hdr_len = buff->len;
+		if (hdr_len > AQ_CFG_RX_HDR_SIZE)
+			hdr_len = eth_get_headlen(skb->dev,
+						  aq_buf_vaddr(&buff->rxdata),
+						  AQ_CFG_RX_HDR_SIZE);
+
+		memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
+		       ALIGN(hdr_len, sizeof(long)));
+
+		if (buff->len - hdr_len > 0) {
+			skb_add_rx_frag(skb, 0, buff->rxdata.page,
+					buff->rxdata.pg_off + hdr_len,
+					buff->len - hdr_len,
 					AQ_CFG_RX_FRAME_MAX);
-			if (unlikely(!skb)) {
-				u64_stats_update_begin(&self->stats.rx.syncp);
-				self->stats.rx.skb_alloc_fails++;
-				u64_stats_update_end(&self->stats.rx.syncp);
-				err = -ENOMEM;
-				goto err_exit;
-			}
-			if (is_ptp_ring)
-				buff->len -=
-					aq_ptp_extract_ts(self->aq_nic, skb,
-						aq_buf_vaddr(&buff->rxdata),
-						buff->len);
-			skb_put(skb, buff->len);
 			page_ref_inc(buff->rxdata.page);
-		} else {
-			skb = napi_alloc_skb(napi, AQ_CFG_RX_HDR_SIZE);
-			if (unlikely(!skb)) {
-				u64_stats_update_begin(&self->stats.rx.syncp);
-				self->stats.rx.skb_alloc_fails++;
-				u64_stats_update_end(&self->stats.rx.syncp);
-				err = -ENOMEM;
-				goto err_exit;
-			}
-			if (is_ptp_ring)
-				buff->len -=
-					aq_ptp_extract_ts(self->aq_nic, skb,
-						aq_buf_vaddr(&buff->rxdata),
-						buff->len);
-
-			hdr_len = buff->len;
-			if (hdr_len > AQ_CFG_RX_HDR_SIZE)
-				hdr_len = eth_get_headlen(skb->dev,
-							  aq_buf_vaddr(&buff->rxdata),
-							  AQ_CFG_RX_HDR_SIZE);
-
-			memcpy(__skb_put(skb, hdr_len), aq_buf_vaddr(&buff->rxdata),
-			       ALIGN(hdr_len, sizeof(long)));
-
-			if (buff->len - hdr_len > 0) {
-				skb_add_rx_frag(skb, 0, buff->rxdata.page,
-						buff->rxdata.pg_off + hdr_len,
-						buff->len - hdr_len,
-						AQ_CFG_RX_FRAME_MAX);
-				page_ref_inc(buff->rxdata.page);
-			}
+		}
 
-			if (!buff->is_eop) {
-				buff_ = buff;
-				i = 1U;
-				do {
-					next_ = buff_->next,
-					buff_ = &self->buff_ring[next_];
+		if (!buff->is_eop) {
+			buff_ = buff;
+			i = 1U;
+			do {
+				next_ = buff_->next;
+				buff_ = &self->buff_ring[next_];
 
-					dma_sync_single_range_for_cpu(
-							aq_nic_get_dev(self->aq_nic),
-							buff_->rxdata.daddr,
-							buff_->rxdata.pg_off,
-							buff_->len,
-							DMA_FROM_DEVICE);
-					skb_add_rx_frag(skb, i++,
-							buff_->rxdata.page,
-							buff_->rxdata.pg_off,
-							buff_->len,
-							AQ_CFG_RX_FRAME_MAX);
-					page_ref_inc(buff_->rxdata.page);
-					buff_->is_cleaned = 1;
-
-					buff->is_ip_cso &= buff_->is_ip_cso;
-					buff->is_udp_cso &= buff_->is_udp_cso;
-					buff->is_tcp_cso &= buff_->is_tcp_cso;
-					buff->is_cso_err |= buff_->is_cso_err;
+				dma_sync_single_range_for_cpu(aq_nic_get_dev(self->aq_nic),
+							      buff_->rxdata.daddr,
+							      buff_->rxdata.pg_off,
+							      buff_->len,
+							      DMA_FROM_DEVICE);
+				skb_add_rx_frag(skb, i++,
+						buff_->rxdata.page,
+						buff_->rxdata.pg_off,
+						buff_->len,
+						AQ_CFG_RX_FRAME_MAX);
+				page_ref_inc(buff_->rxdata.page);
+				buff_->is_cleaned = 1;
 
-				} while (!buff_->is_eop);
-			}
+				buff->is_ip_cso &= buff_->is_ip_cso;
+				buff->is_udp_cso &= buff_->is_udp_cso;
+				buff->is_tcp_cso &= buff_->is_tcp_cso;
+				buff->is_cso_err |= buff_->is_cso_err;
+
+			} while (!buff_->is_eop);
 		}
 
 		if (buff->is_vlan)
-- 
cgit v1.2.3


From 5204bb683c1633e550c2124ccc2358dd645a80db Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Mon, 23 Nov 2020 07:36:25 +0200
Subject: devlink: Fix reload stats structure

Fix reload stats structure exposed to the user. Change stats structure
hierarchy to have the reload action as a parent of the stat entry and
then stat entry includes value per limit. This will also help to avoid
string concatenation on iproute2 output.

Reload stats structure before this fix:
"stats": {
    "reload": {
        "driver_reinit": 2,
        "fw_activate": 1,
        "fw_activate_no_reset": 0
     }
}

After this fix:
"stats": {
    "reload": {
        "driver_reinit": {
            "unspecified": 2
        },
        "fw_activate": {
            "unspecified": 1,
            "no_reset": 0
        }
}

Fixes: a254c264267e ("devlink: Add reload stats")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/1606109785-25197-1-git-send-email-moshe@mellanox.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/devlink.h |  2 ++
 net/core/devlink.c           | 49 +++++++++++++++++++++++++++++---------------
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 0113bc4db9f5..5203f54a2be1 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -526,6 +526,8 @@ enum devlink_attr {
 	DEVLINK_ATTR_RELOAD_STATS_LIMIT,	/* u8 */
 	DEVLINK_ATTR_RELOAD_STATS_VALUE,	/* u32 */
 	DEVLINK_ATTR_REMOTE_RELOAD_STATS,	/* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_INFO,        /* nested */
+	DEVLINK_ATTR_RELOAD_ACTION_STATS,       /* nested */
 
 	/* add new attributes above here, update the policy in devlink.c */
 
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 4b0211590aac..c91e15b7a2bd 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -517,7 +517,7 @@ devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_l
 	return test_bit(limit, &devlink->ops->reload_limits);
 }
 
-static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_action action,
+static int devlink_reload_stat_put(struct sk_buff *msg,
 				   enum devlink_reload_limit limit, u32 value)
 {
 	struct nlattr *reload_stats_entry;
@@ -526,8 +526,7 @@ static int devlink_reload_stat_put(struct sk_buff *msg, enum devlink_reload_acti
 	if (!reload_stats_entry)
 		return -EMSGSIZE;
 
-	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, action) ||
-	    nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+	if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
 	    nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
 		goto nla_put_failure;
 	nla_nest_end(msg, reload_stats_entry);
@@ -540,7 +539,7 @@ nla_put_failure:
 
 static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
 {
-	struct nlattr *reload_stats_attr;
+	struct nlattr *reload_stats_attr, *act_info, *act_stats;
 	int i, j, stat_idx;
 	u32 value;
 
@@ -552,17 +551,29 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
 	if (!reload_stats_attr)
 		return -EMSGSIZE;
 
-	for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
-		/* Remote stats are shown even if not locally supported. Stats
-		 * of actions with unspecified limit are shown though drivers
-		 * don't need to register unspecified limit.
-		 */
-		if (!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
-		    !devlink_reload_limit_is_supported(devlink, j))
+	for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+		if ((!is_remote &&
+		     !devlink_reload_action_is_supported(devlink, i)) ||
+		    i == DEVLINK_RELOAD_ACTION_UNSPEC)
 			continue;
-		for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
-			if ((!is_remote && !devlink_reload_action_is_supported(devlink, i)) ||
-			    i == DEVLINK_RELOAD_ACTION_UNSPEC ||
+		act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+		if (!act_info)
+			goto nla_put_failure;
+
+		if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+			goto action_info_nest_cancel;
+		act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+		if (!act_stats)
+			goto action_info_nest_cancel;
+
+		for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+			/* Remote stats are shown even if not locally supported.
+			 * Stats of actions with unspecified limit are shown
+			 * though drivers don't need to register unspecified
+			 * limit.
+			 */
+			if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+			     !devlink_reload_limit_is_supported(devlink, j)) ||
 			    devlink_reload_combination_is_invalid(i, j))
 				continue;
 
@@ -571,13 +582,19 @@ static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink
 				value = devlink->stats.reload_stats[stat_idx];
 			else
 				value = devlink->stats.remote_reload_stats[stat_idx];
-			if (devlink_reload_stat_put(msg, i, j, value))
-				goto nla_put_failure;
+			if (devlink_reload_stat_put(msg, j, value))
+				goto action_stats_nest_cancel;
 		}
+		nla_nest_end(msg, act_stats);
+		nla_nest_end(msg, act_info);
 	}
 	nla_nest_end(msg, reload_stats_attr);
 	return 0;
 
+action_stats_nest_cancel:
+	nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+	nla_nest_cancel(msg, act_info);
 nla_put_failure:
 	nla_nest_cancel(msg, reload_stats_attr);
 	return -EMSGSIZE;
-- 
cgit v1.2.3


From 407c85c7ddd6b84d3cbdd2275616f70c27c17913 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexanderduyck@fb.com>
Date: Fri, 20 Nov 2020 19:47:44 -0800
Subject: tcp: Set ECT0 bit in tos/tclass for synack when BPF needs ECN

When a BPF program is used to select between a type of TCP congestion
control algorithm that uses either ECN or not there is a case where the
synack for the frame was coming up without the ECT0 bit set. A bit of
research found that this was due to the final socket being configured to
dctcp while the listener socket was staying in cubic.

To reproduce it all that is needed is to monitor TCP traffic while running
the sample bpf program "samples/bpf/tcp_cong_kern.c". What is observed,
assuming tcp_dctcp module is loaded or compiled in and the traffic matches
the rules in the sample file, is that for all frames with the exception of
the synack the ECT0 bit is set.

To address that it is necessary to make one additional call to
tcp_bpf_ca_needs_ecn using the request socket and then use the output of
that to set the ECT0 bit for the tos/tclass of the packet.

Fixes: 91b5b21c7c16 ("bpf: Add support for changing congestion control")
Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/160593039663.2604.1374502006916871573.stgit@localhost.localdomain
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/tcp_ipv4.c | 12 ++++++++----
 net/ipv6/tcp_ipv6.c |  9 +++++++--
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c95fcdfeed42..8391aa29e7a4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -980,13 +980,17 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
 
 	skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
 
-	tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
-			tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
-			inet_sk(sk)->tos;
-
 	if (skb) {
 		__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
 
+		tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+				tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
+				inet_sk(sk)->tos;
+
+		if (!INET_ECN_is_capable(tos) &&
+		    tcp_bpf_ca_needs_ecn((struct sock *)req))
+			tos |= INET_ECN_ECT_0;
+
 		rcu_read_lock();
 		err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
 					    ireq->ir_rmt_addr,
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d2502911b7fa..992cbf3eb9e3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -527,11 +527,16 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
 		if (np->repflow && ireq->pktopts)
 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
 
-		rcu_read_lock();
-		opt = ireq->ipv6_opt;
 		tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
 				tcp_rsk(req)->syn_tos & ~INET_ECN_MASK :
 				np->tclass;
+
+		if (!INET_ECN_is_capable(tclass) &&
+		    tcp_bpf_ca_needs_ecn((struct sock *)req))
+			tclass |= INET_ECN_ECT_0;
+
+		rcu_read_lock();
+		opt = ireq->ipv6_opt;
 		if (!opt)
 			opt = rcu_dereference(np->opt);
 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
-- 
cgit v1.2.3


From bc40a3691f15c0728209cd0e2dc9e8e18854187f Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 23 Nov 2020 14:42:26 +0100
Subject: MAINTAINERS: Update page pool entry

Add some file F: matches that is related to page_pool.

Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/r/160613894639.2826716.14635284017814375894.stgit@firesoul
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 MAINTAINERS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ac84b34ef2ad..d0d974f44655 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13161,7 +13161,9 @@ M:	Jesper Dangaard Brouer <hawk@kernel.org>
 M:	Ilias Apalodimas <ilias.apalodimas@linaro.org>
 L:	netdev@vger.kernel.org
 S:	Supported
+F:	Documentation/networking/page_pool.rst
 F:	include/net/page_pool.h
+F:	include/trace/events/page_pool.h
 F:	net/core/page_pool.c
 
 PANASONIC LAPTOP ACPI EXTRAS DRIVER
-- 
cgit v1.2.3


From 078eb55cdf25e0a621d406c233cc1b4acc31c82f Mon Sep 17 00:00:00 2001
From: Ezequiel Garcia <ezequiel@collabora.com>
Date: Mon, 23 Nov 2020 18:35:53 +0200
Subject: dpaa2-eth: Fix compile error due to missing devlink support

The dpaa2 driver depends on devlink, so it should select
NET_DEVLINK in order to fix compile errors, such as:

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.o: in function `dpaa2_eth_rx_err':
dpaa2-eth.c:(.text+0x3cec): undefined reference to `devlink_trap_report'
drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-devlink.o: in function `dpaa2_eth_dl_info_get':
dpaa2-eth-devlink.c:(.text+0x160): undefined reference to `devlink_info_driver_name_put'

Fixes: ceeb03ad8e22 ("dpaa2-eth: add basic devlink support")
Signed-off-by: Ezequiel Garcia <ezequiel@collabora.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Link: https://lore.kernel.org/r/20201123163553.1666476-1-ciorneiioana@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/dpaa2/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index c0e05f71826d..ee7a906e30b3 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -5,6 +5,7 @@ config FSL_DPAA2_ETH
 	select PHYLINK
 	select PCS_LYNX
 	select FSL_XGMAC_MDIO
+	select NET_DEVLINK
 	help
 	  This is the DPAA2 Ethernet driver supporting Freescale SoCs
 	  with DPAA2 (DataPath Acceleration Architecture v2).
-- 
cgit v1.2.3


From d8f0a86795c69f5b697f7d9e5274c124da93c92d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzk@kernel.org>
Date: Mon, 23 Nov 2020 17:23:51 +0100
Subject: nfc: s3fwrn5: use signed integer for parsing GPIO numbers

GPIOs - as returned by of_get_named_gpio() and used by the gpiolib - are
signed integers, where negative number indicates error.  The return
value of of_get_named_gpio() should not be assigned to an unsigned int
because in case of !CONFIG_GPIOLIB such number would be a valid GPIO.

Fixes: c04c674fadeb ("nfc: s3fwrn5: Add driver for Samsung S3FWRN5 NFC Chip")
Signed-off-by: Krzysztof Kozlowski <krzk@kernel.org>
Link: https://lore.kernel.org/r/20201123162351.209100-1-krzk@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/s3fwrn5/i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nfc/s3fwrn5/i2c.c b/drivers/nfc/s3fwrn5/i2c.c
index dc995286be84..d0a3bd9ff3c3 100644
--- a/drivers/nfc/s3fwrn5/i2c.c
+++ b/drivers/nfc/s3fwrn5/i2c.c
@@ -26,8 +26,8 @@ struct s3fwrn5_i2c_phy {
 	struct i2c_client *i2c_dev;
 	struct nci_dev *ndev;
 
-	unsigned int gpio_en;
-	unsigned int gpio_fw_wake;
+	int gpio_en;
+	int gpio_fw_wake;
 
 	struct mutex mutex;
 
-- 
cgit v1.2.3


From 5b7022cf1dc0d721bd4b5f3bada05bd8ced82fe0 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Mon, 23 Nov 2020 21:08:57 +0200
Subject: net: ena: handle bad request id in ena_netdev

After request id is checked in validate_rx_req_id() its value is still
used in the line
	rx_ring->free_ids[next_to_clean] =
					rx_ring->ena_bufs[i].req_id;
even if it was found to be out-of-bound for the array free_ids.

The patch moves the request id to an earlier stage in the napi routine and
makes sure its value isn't used if it's found out-of-bounds.

Fixes: 30623e1ed116 ("net: ena: avoid memory access violation by validating req_id properly")
Signed-off-by: Ido Segev <idose@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_eth_com.c |  3 ++
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 43 +++++++--------------------
 2 files changed, 14 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index ad30cacc1622..032ab9f20438 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -516,6 +516,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 {
 	struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0];
 	struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
+	u16 q_depth = io_cq->q_depth;
 	u16 cdesc_idx = 0;
 	u16 nb_hw_desc;
 	u16 i = 0;
@@ -543,6 +544,8 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 	do {
 		ena_buf[i].len = cdesc->length;
 		ena_buf[i].req_id = cdesc->req_id;
+		if (unlikely(ena_buf[i].req_id >= q_depth))
+			return -EIO;
 
 		if (++i >= nb_hw_desc)
 			break;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index e8131dadc22c..574c2b5ba21e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -789,24 +789,6 @@ static void ena_free_all_io_tx_resources(struct ena_adapter *adapter)
 					      adapter->num_io_queues);
 }
 
-static int validate_rx_req_id(struct ena_ring *rx_ring, u16 req_id)
-{
-	if (likely(req_id < rx_ring->ring_size))
-		return 0;
-
-	netif_err(rx_ring->adapter, rx_err, rx_ring->netdev,
-		  "Invalid rx req_id: %hu\n", req_id);
-
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->rx_stats.bad_req_id++;
-	u64_stats_update_end(&rx_ring->syncp);
-
-	/* Trigger device reset */
-	rx_ring->adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
-	set_bit(ENA_FLAG_TRIGGER_RESET, &rx_ring->adapter->flags);
-	return -EFAULT;
-}
-
 /* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors)
  * @adapter: network interface device structure
  * @qid: queue index
@@ -1356,15 +1338,10 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 	struct ena_rx_buffer *rx_info;
 	u16 len, req_id, buf = 0;
 	void *va;
-	int rc;
 
 	len = ena_bufs[buf].len;
 	req_id = ena_bufs[buf].req_id;
 
-	rc = validate_rx_req_id(rx_ring, req_id);
-	if (unlikely(rc < 0))
-		return NULL;
-
 	rx_info = &rx_ring->rx_buffer_info[req_id];
 
 	if (unlikely(!rx_info->page)) {
@@ -1440,10 +1417,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 		len = ena_bufs[buf].len;
 		req_id = ena_bufs[buf].req_id;
 
-		rc = validate_rx_req_id(rx_ring, req_id);
-		if (unlikely(rc < 0))
-			return NULL;
-
 		rx_info = &rx_ring->rx_buffer_info[req_id];
 	} while (1);
 
@@ -1697,12 +1670,18 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 error:
 	adapter = netdev_priv(rx_ring->netdev);
 
-	u64_stats_update_begin(&rx_ring->syncp);
-	rx_ring->rx_stats.bad_desc_num++;
-	u64_stats_update_end(&rx_ring->syncp);
+	if (rc == -ENOSPC) {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.bad_desc_num++;
+		u64_stats_update_end(&rx_ring->syncp);
+		adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
+	} else {
+		u64_stats_update_begin(&rx_ring->syncp);
+		rx_ring->rx_stats.bad_req_id++;
+		u64_stats_update_end(&rx_ring->syncp);
+		adapter->reset_reason = ENA_REGS_RESET_INV_RX_REQ_ID;
+	}
 
-	/* Too many desc from the device. Trigger reset */
-	adapter->reset_reason = ENA_REGS_RESET_TOO_MANY_RX_DESCS;
 	set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
 
 	return 0;
-- 
cgit v1.2.3


From 09323b3bca95181c0da79daebc8b0603e500f573 Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Mon, 23 Nov 2020 21:08:58 +0200
Subject: net: ena: set initial DMA width to avoid intel iommu issue

The ENA driver uses the readless mechanism, which uses DMA, to find
out what the DMA mask is supposed to be.

If DMA is used without setting the dma_mask first, it causes the
Intel IOMMU driver to think that ENA is a 32-bit device and therefore
disables IOMMU passthrough permanently.

This patch sets the dma_mask to be ENA_MAX_PHYS_ADDR_SIZE_BITS=48
before readless initialization in
ena_device_init()->ena_com_mmio_reg_read_request_init(),
which is large enough to workaround the intel_iommu issue.

DMA mask is set again to the correct value after it's received from the
device after readless is initialized.

The patch also changes the driver to use dma_set_mask_and_coherent()
function instead of the two pci_set_dma_mask() and
pci_set_consistent_dma_mask() ones. Both methods achieve the same
effect.

Fixes: 1738cd3ed342 ("net: ena: Add a driver for Amazon Elastic Network Adapters (ENA)")
Signed-off-by: Mike Cui <mikecui@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 574c2b5ba21e..ec0008ba7751 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3367,16 +3367,9 @@ static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev,
 		goto err_mmio_read_less;
 	}
 
-	rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width));
+	rc = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(dma_width));
 	if (rc) {
-		dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc);
-		goto err_mmio_read_less;
-	}
-
-	rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width));
-	if (rc) {
-		dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n",
-			rc);
+		dev_err(dev, "dma_set_mask_and_coherent failed %d\n", rc);
 		goto err_mmio_read_less;
 	}
 
@@ -4146,6 +4139,12 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return rc;
 	}
 
+	rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(ENA_MAX_PHYS_ADDR_SIZE_BITS));
+	if (rc) {
+		dev_err(&pdev->dev, "dma_set_mask_and_coherent failed %d\n", rc);
+		goto err_disable_device;
+	}
+
 	pci_set_master(pdev);
 
 	ena_dev = vzalloc(sizeof(*ena_dev));
-- 
cgit v1.2.3


From 1396d3148bd250db880573f9ed0abe5d6fba1fce Mon Sep 17 00:00:00 2001
From: Shay Agroskin <shayagr@amazon.com>
Date: Mon, 23 Nov 2020 21:08:59 +0200
Subject: net: ena: fix packet's addresses for rx_offset feature

This patch fixes two lines in which the rx_offset received by the device
wasn't taken into account:

- prefetch function:
	In our driver the copied data would reside in
	rx_info->page + rx_headroom + rx_offset

	so the prefetch function is changed accordingly.

- setting page_offset to zero for descriptors > 1:
	for every descriptor but the first, the rx_offset is zero. Hence
	the page_offset value should be set to rx_headroom.

	The previous implementation changed the value of rx_info after
	the descriptor was added to the SKB (essentially providing wrong
	page offset).

Fixes: 68f236df93a9 ("net: ena: add support for the rx offset feature")
Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index ec0008ba7751..df1884d57d1a 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -908,10 +908,14 @@ static void ena_free_all_io_rx_resources(struct ena_adapter *adapter)
 static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 				    struct ena_rx_buffer *rx_info, gfp_t gfp)
 {
+	int headroom = rx_ring->rx_headroom;
 	struct ena_com_buf *ena_buf;
 	struct page *page;
 	dma_addr_t dma;
 
+	/* restore page offset value in case it has been changed by device */
+	rx_info->page_offset = headroom;
+
 	/* if previous allocated page is not used */
 	if (unlikely(rx_info->page))
 		return 0;
@@ -941,10 +945,9 @@ static int ena_alloc_rx_page(struct ena_ring *rx_ring,
 		  "Allocate page %p, rx_info %p\n", page, rx_info);
 
 	rx_info->page = page;
-	rx_info->page_offset = 0;
 	ena_buf = &rx_info->ena_buf;
-	ena_buf->paddr = dma + rx_ring->rx_headroom;
-	ena_buf->len = ENA_PAGE_SIZE - rx_ring->rx_headroom;
+	ena_buf->paddr = dma + headroom;
+	ena_buf->len = ENA_PAGE_SIZE - headroom;
 
 	return 0;
 }
@@ -1356,7 +1359,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
 	/* save virt address of first buffer */
 	va = page_address(rx_info->page) + rx_info->page_offset;
-	prefetch(va + NET_IP_ALIGN);
+
+	prefetch(va);
 
 	if (len <= rx_ring->rx_copybreak) {
 		skb = ena_alloc_skb(rx_ring, false);
@@ -1397,8 +1401,6 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 				rx_info->page_offset, len, ENA_PAGE_SIZE);
-		/* The offset is non zero only for the first buffer */
-		rx_info->page_offset = 0;
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "RX skb updated. len %d. data_len %d\n",
@@ -1517,8 +1519,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp)
 	int ret;
 
 	rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
-	xdp->data = page_address(rx_info->page) +
-		rx_info->page_offset + rx_ring->rx_headroom;
+	xdp->data = page_address(rx_info->page) + rx_info->page_offset;
 	xdp_set_data_meta_invalid(xdp);
 	xdp->data_hard_start = page_address(rx_info->page);
 	xdp->data_end = xdp->data + rx_ring->ena_bufs[0].len;
@@ -1585,8 +1586,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 		if (unlikely(ena_rx_ctx.descs == 0))
 			break;
 
+		/* First descriptor might have an offset set by the device */
 		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
-		rx_info->page_offset = ena_rx_ctx.pkt_offset;
+		rx_info->page_offset += ena_rx_ctx.pkt_offset;
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
-- 
cgit v1.2.3


From a0faaa27c71608799e0dd765c5af38a089091802 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Mon, 23 Nov 2020 13:35:45 -0600
Subject: ibmvnic: fix NULL pointer dereference in reset_sub_crq_queues

adapter->tx_scrq and adapter->rx_scrq could be NULL if the previous reset
did not complete after freeing sub crqs. Check for NULL before
dereferencing them.

Snippet of call trace:
ibmvnic 30000006 env6: Releasing sub-CRQ
ibmvnic 30000006 env6: Releasing CRQ
...
ibmvnic 30000006 env6: Got Control IP offload Response
ibmvnic 30000006 env6: Re-setting tx_scrq[0]
BUG: Kernel NULL pointer dereference on read at 0x00000000
Faulting instruction address: 0xc008000003dea7cc
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: rpadlpar_io rpaphp xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables xsk_diag tcp_diag udp_diag raw_diag inet_diag unix_diag af_packet_diag netlink_diag tun bridge stp llc rfkill sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c sd_mod t10_pi sg ibmvscsi ibmvnic ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod
CPU: 80 PID: 1856 Comm: kworker/80:2 Tainted: G        W         5.8.0+ #4
Workqueue: events __ibmvnic_reset [ibmvnic]
NIP:  c008000003dea7cc LR: c008000003dea7bc CTR: 0000000000000000
REGS: c0000007ef7db860 TRAP: 0380   Tainted: G        W          (5.8.0+)
MSR:  800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 28002422  XER: 0000000d
CFAR: c000000000bd9520 IRQMASK: 0
GPR00: c008000003dea7bc c0000007ef7dbaf0 c008000003df7400 c0000007fa26ec00
GPR04: c0000007fcd0d008 c0000007fcd96350 0000000000000027 c0000007fcd0d010
GPR08: 0000000000000023 0000000000000000 0000000000000000 0000000000000000
GPR12: 0000000000002000 c00000001ec18e00 c0000000001982f8 c0000007bad6e840
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000000 0000000000000000 0000000000000000 fffffffffffffef7
GPR24: 0000000000000402 c0000007fa26f3a8 0000000000000003 c00000016f8ec048
GPR28: 0000000000000000 0000000000000000 0000000000000000 c0000007fa26ec00
NIP [c008000003dea7cc] ibmvnic_reset_init+0x15c/0x258 [ibmvnic]
LR [c008000003dea7bc] ibmvnic_reset_init+0x14c/0x258 [ibmvnic]
Call Trace:
[c0000007ef7dbaf0] [c008000003dea7bc] ibmvnic_reset_init+0x14c/0x258 [ibmvnic] (unreliable)
[c0000007ef7dbb80] [c008000003de8860] __ibmvnic_reset+0x408/0x970 [ibmvnic]
[c0000007ef7dbc50] [c00000000018b7cc] process_one_work+0x2cc/0x800
[c0000007ef7dbd20] [c00000000018bd78] worker_thread+0x78/0x520
[c0000007ef7dbdb0] [c0000000001984c4] kthread+0x1d4/0x1e0
[c0000007ef7dbe20] [c00000000000cea8] ret_from_kernel_thread+0x5c/0x74

Fixes: 57a49436f4e8 ("ibmvnic: Reset sub-crqs during driver reset")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 2aa40b2f225c..db4a8e17477c 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2880,6 +2880,9 @@ static int reset_sub_crq_queues(struct ibmvnic_adapter *adapter)
 {
 	int i, rc;
 
+	if (!adapter->tx_scrq || !adapter->rx_scrq)
+		return -EINVAL;
+
 	for (i = 0; i < adapter->req_tx_queues; i++) {
 		netdev_dbg(adapter->netdev, "Re-setting tx_scrq[%d]\n", i);
 		rc = reset_one_sub_crq_queue(adapter, adapter->tx_scrq[i]);
-- 
cgit v1.2.3


From 0e435befaea45f7ea58682eecab5e37e05b2ce65 Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Mon, 23 Nov 2020 13:35:46 -0600
Subject: ibmvnic: fix NULL pointer dereference in ibmvic_reset_crq

crq->msgs could be NULL if the previous reset did not complete after
freeing crq->msgs. Check for NULL before dereferencing them.

Snippet of call trace:
...
ibmvnic 30000003 env3 (unregistering): Releasing sub-CRQ
ibmvnic 30000003 env3 (unregistering): Releasing CRQ
BUG: Kernel NULL pointer dereference on read at 0x00000000
Faulting instruction address: 0xc0000000000c1a30
Oops: Kernel access of bad area, sig: 11 [#1]
LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
Modules linked in: ibmvnic(E-) rpadlpar_io rpaphp xt_CHECKSUM xt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 nft_compat nft_counter nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nf_tables xsk_diag tcp_diag udp_diag tun raw_diag inet_diag unix_diag bridge af_packet_diag netlink_diag stp llc rfkill sunrpc pseries_rng xts vmx_crypto uio_pdrv_genirq uio binfmt_misc ip_tables xfs libcrc32c sd_mod t10_pi sg ibmvscsi ibmveth scsi_transport_srp dm_mirror dm_region_hash dm_log dm_mod [last unloaded: ibmvnic]
CPU: 20 PID: 8426 Comm: kworker/20:0 Tainted: G            E     5.10.0-rc1+ #12
Workqueue: events __ibmvnic_reset [ibmvnic]
NIP:  c0000000000c1a30 LR: c008000001b00c18 CTR: 0000000000000400
REGS: c00000000d05b7a0 TRAP: 0380   Tainted: G            E      (5.10.0-rc1+)
MSR:  800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE>  CR: 44002480  XER: 20040000
CFAR: c0000000000c19ec IRQMASK: 0
GPR00: 0000000000000400 c00000000d05ba30 c008000001b17c00 0000000000000000
GPR04: 0000000000000000 0000000000000000 0000000000000000 00000000000001e2
GPR08: 000000000001f400 ffffffffffffd950 0000000000000000 c008000001b0b280
GPR12: c0000000000c19c8 c00000001ec72e00 c00000000019a778 c00000002647b440
GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
GPR20: 0000000000000006 0000000000000001 0000000000000003 0000000000000002
GPR24: 0000000000001000 c008000001b0d570 0000000000000005 c00000007ab5d550
GPR28: c00000007ab5c000 c000000032fcf848 c00000007ab5cc00 c000000032fcf800
NIP [c0000000000c1a30] memset+0x68/0x104
LR [c008000001b00c18] ibmvnic_reset_crq+0x70/0x110 [ibmvnic]
Call Trace:
[c00000000d05ba30] [0000000000000800] 0x800 (unreliable)
[c00000000d05bab0] [c008000001b0a930] do_reset.isra.40+0x224/0x634 [ibmvnic]
[c00000000d05bb80] [c008000001b08574] __ibmvnic_reset+0x17c/0x3c0 [ibmvnic]
[c00000000d05bc50] [c00000000018d9ac] process_one_work+0x2cc/0x800
[c00000000d05bd20] [c00000000018df58] worker_thread+0x78/0x520
[c00000000d05bdb0] [c00000000019a934] kthread+0x1c4/0x1d0
[c00000000d05be20] [c00000000000d5d0] ret_from_kernel_thread+0x5c/0x6c

Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index db4a8e17477c..26ba07df404e 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -4973,6 +4973,9 @@ static int ibmvnic_reset_crq(struct ibmvnic_adapter *adapter)
 	} while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
 
 	/* Clean out the queue */
+	if (!crq->msgs)
+		return -EINVAL;
+
 	memset(crq->msgs, 0, PAGE_SIZE);
 	crq->cur = 0;
 	crq->active = false;
-- 
cgit v1.2.3


From 3ada288150fb17ab3fcce2cf5fce20461f86b2ee Mon Sep 17 00:00:00 2001
From: Lijun Pan <ljp@linux.ibm.com>
Date: Mon, 23 Nov 2020 13:35:47 -0600
Subject: ibmvnic: enhance resetting status check during module exit

Based on the discussion with Sukadev Bhattiprolu and Dany Madden,
we believe that checking adapter->resetting bit is preferred
since RESETTING state flag is not as strict as resetting bit.
RESETTING state flag is removed since it is verbose now.

Fixes: 7d7195a026ba ("ibmvnic: Do not process device remove during device reset")
Signed-off-by: Lijun Pan <ljp@linux.ibm.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ibm/ibmvnic.c | 3 +--
 drivers/net/ethernet/ibm/ibmvnic.h | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 26ba07df404e..2491ebc97871 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -2215,7 +2215,6 @@ static void __ibmvnic_reset(struct work_struct *work)
 
 		if (!saved_state) {
 			reset_state = adapter->state;
-			adapter->state = VNIC_RESETTING;
 			saved_state = true;
 		}
 		spin_unlock_irqrestore(&adapter->state_lock, flags);
@@ -5280,7 +5279,7 @@ static int ibmvnic_remove(struct vio_dev *dev)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adapter->state_lock, flags);
-	if (adapter->state == VNIC_RESETTING) {
+	if (test_bit(0, &adapter->resetting)) {
 		spin_unlock_irqrestore(&adapter->state_lock, flags);
 		return -EBUSY;
 	}
diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h
index 217dcc7ded70..47a3fd71c96f 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -942,8 +942,7 @@ enum vnic_state {VNIC_PROBING = 1,
 		 VNIC_CLOSING,
 		 VNIC_CLOSED,
 		 VNIC_REMOVING,
-		 VNIC_REMOVED,
-		 VNIC_RESETTING};
+		 VNIC_REMOVED};
 
 enum ibmvnic_reset_reason {VNIC_RESET_FAILOVER = 1,
 			   VNIC_RESET_MOBILITY,
-- 
cgit v1.2.3


From 6f7a1f9c1af30f1eadc0ad9e77ec8ee95c48b2c9 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Tue, 24 Nov 2020 20:15:24 -0800
Subject: Documentation: netdev-FAQ: suggest how to post co-dependent series

Make an explicit suggestion how to post user space side of kernel
patches to avoid reposts when patchwork groups the wrong patches.

v2: mention the cases unlike iproute2 explicitly

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/netdev-FAQ.rst | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst
index 21537766be4d..4b9ed5874d5a 100644
--- a/Documentation/networking/netdev-FAQ.rst
+++ b/Documentation/networking/netdev-FAQ.rst
@@ -254,6 +254,32 @@ you will have done run-time testing specific to your change, but at a
 minimum, your changes should survive an ``allyesconfig`` and an
 ``allmodconfig`` build without new warnings or failures.
 
+Q: How do I post corresponding changes to user space components?
+----------------------------------------------------------------
+A: User space code exercising kernel features should be posted
+alongside kernel patches. This gives reviewers a chance to see
+how any new interface is used and how well it works.
+
+When user space tools reside in the kernel repo itself all changes
+should generally come as one series. If series becomes too large
+or the user space project is not reviewed on netdev include a link
+to a public repo where user space patches can be seen.
+
+In case user space tooling lives in a separate repository but is
+reviewed on netdev  (e.g. patches to `iproute2` tools) kernel and
+user space patches should form separate series (threads) when posted
+to the mailing list, e.g.::
+
+  [PATCH net-next 0/3] net: some feature cover letter
+   └─ [PATCH net-next 1/3] net: some feature prep
+   └─ [PATCH net-next 2/3] net: some feature do it
+   └─ [PATCH net-next 3/3] selftest: net: some feature
+
+  [PATCH iproute2-next] ip: add support for some feature
+
+Posting as one thread is discouraged because it confuses patchwork
+(as of patchwork 2.2.2).
+
 Q: Any other tips to help ensure my net/net-next patch gets OK'd?
 -----------------------------------------------------------------
 A: Attention to detail.  Re-read your own work as if you were the
-- 
cgit v1.2.3


From e255e11e66da8281e337e4e352956e8a4999fca4 Mon Sep 17 00:00:00 2001
From: Wang Hai <wanghai38@huawei.com>
Date: Tue, 24 Nov 2020 15:17:28 +0800
Subject: ipv6: addrlabel: fix possible memory leak in ip6addrlbl_net_init

kmemleak report a memory leak as follows:

unreferenced object 0xffff8880059c6a00 (size 64):
  comm "ip", pid 23696, jiffies 4296590183 (age 1755.384s)
  hex dump (first 32 bytes):
    20 01 00 10 00 00 00 00 00 00 00 00 00 00 00 00   ...............
    1c 00 00 00 00 00 00 00 00 00 00 00 07 00 00 00  ................
  backtrace:
    [<00000000aa4e7a87>] ip6addrlbl_add+0x90/0xbb0
    [<0000000070b8d7f1>] ip6addrlbl_net_init+0x109/0x170
    [<000000006a9ca9d4>] ops_init+0xa8/0x3c0
    [<000000002da57bf2>] setup_net+0x2de/0x7e0
    [<000000004e52d573>] copy_net_ns+0x27d/0x530
    [<00000000b07ae2b4>] create_new_namespaces+0x382/0xa30
    [<000000003b76d36f>] unshare_nsproxy_namespaces+0xa1/0x1d0
    [<0000000030653721>] ksys_unshare+0x3a4/0x780
    [<0000000007e82e40>] __x64_sys_unshare+0x2d/0x40
    [<0000000031a10c08>] do_syscall_64+0x33/0x40
    [<0000000099df30e7>] entry_SYSCALL_64_after_hwframe+0x44/0xa9

We should free all rules when we catch an error in ip6addrlbl_net_init().
otherwise a memory leak will occur.

Fixes: 2a8cc6c89039 ("[IPV6] ADDRCONF: Support RFC3484 configurable address selection policy table.")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Link: https://lore.kernel.org/r/20201124071728.8385-1-wanghai38@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/addrlabel.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 642fc6ac13d2..8a22486cf270 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -306,7 +306,9 @@ static int ip6addrlbl_del(struct net *net,
 /* add default label */
 static int __net_init ip6addrlbl_net_init(struct net *net)
 {
-	int err = 0;
+	struct ip6addrlbl_entry *p = NULL;
+	struct hlist_node *n;
+	int err;
 	int i;
 
 	ADDRLABEL(KERN_DEBUG "%s\n", __func__);
@@ -315,14 +317,20 @@ static int __net_init ip6addrlbl_net_init(struct net *net)
 	INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head);
 
 	for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) {
-		int ret = ip6addrlbl_add(net,
-					 ip6addrlbl_init_table[i].prefix,
-					 ip6addrlbl_init_table[i].prefixlen,
-					 0,
-					 ip6addrlbl_init_table[i].label, 0);
-		/* XXX: should we free all rules when we catch an error? */
-		if (ret && (!err || err != -ENOMEM))
-			err = ret;
+		err = ip6addrlbl_add(net,
+				     ip6addrlbl_init_table[i].prefix,
+				     ip6addrlbl_init_table[i].prefixlen,
+				     0,
+				     ip6addrlbl_init_table[i].label, 0);
+		if (err)
+			goto err_ip6addrlbl_add;
+	}
+	return 0;
+
+err_ip6addrlbl_add:
+	hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) {
+		hlist_del_rcu(&p->list);
+		kfree_rcu(p, rcu);
 	}
 	return err;
 }
-- 
cgit v1.2.3


From 12a8fe56c0f06eaab1f9d89d246c3591bcc7a966 Mon Sep 17 00:00:00 2001
From: Antonio Borneo <antonio.borneo@st.com>
Date: Tue, 24 Nov 2020 23:37:29 +0100
Subject: net: stmmac: fix incorrect merge of patch upstream

Commit 757926247836 ("net: stmmac: add flexible PPS to dwmac
4.10a") was intended to modify the struct dwmac410_ops, but it got
somehow badly merged and modified the struct dwmac4_ops.

Revert the modification in struct dwmac4_ops and re-apply it
properly in struct dwmac410_ops.

Fixes: 757926247836 ("net: stmmac: add flexible PPS to dwmac 4.10a")
Signed-off-by: Antonio Borneo <antonio.borneo@st.com>
Reported-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Link: https://lore.kernel.org/r/20201124223729.886992-1-antonio.borneo@st.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 002791b77356..ced6d76a0d85 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -1171,7 +1171,6 @@ const struct stmmac_ops dwmac4_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
-	.flex_pps_config = dwmac5_flex_pps_config,
 	.set_mac_loopback = dwmac4_set_mac_loopback,
 	.update_vlan_hash = dwmac4_update_vlan_hash,
 	.sarc_configure = dwmac4_sarc_configure,
@@ -1213,6 +1212,7 @@ const struct stmmac_ops dwmac410_ops = {
 	.pcs_get_adv_lp = dwmac4_get_adv_lp,
 	.debug = dwmac4_debug,
 	.set_filter = dwmac4_set_filter,
+	.flex_pps_config = dwmac5_flex_pps_config,
 	.set_mac_loopback = dwmac4_set_mac_loopback,
 	.update_vlan_hash = dwmac4_update_vlan_hash,
 	.sarc_configure = dwmac4_sarc_configure,
-- 
cgit v1.2.3


From 2543a6000e593a213fba5b504f52c07e09f39835 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 24 Nov 2020 12:38:22 -0800
Subject: gro_cells: reduce number of synchronize_net() calls

After cited commit, gro_cells_destroy() became damn slow
on hosts with a lot of cores.

This is because we have one additional synchronize_net() per cpu as
stated in the changelog.

gro_cells_init() is setting NAPI_STATE_NO_BUSY_POLL, and this was enough
to not have one synchronize_net() call per netif_napi_del()

We can factorize all the synchronize_net() to a single one,
right before freeing per-cpu memory.

Fixes: 5198d545dba8 ("net: remove napi_hash_del() from driver-facing API")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20201124203822.1360107-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/gro_cells.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c
index e095fb871d91..6eb2e5ec2c50 100644
--- a/net/core/gro_cells.c
+++ b/net/core/gro_cells.c
@@ -99,9 +99,14 @@ void gro_cells_destroy(struct gro_cells *gcells)
 		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
 		napi_disable(&cell->napi);
-		netif_napi_del(&cell->napi);
+		__netif_napi_del(&cell->napi);
 		__skb_queue_purge(&cell->napi_skbs);
 	}
+	/* This barrier is needed because netpoll could access dev->napi_list
+	 * under rcu protection.
+	 */
+	synchronize_net();
+
 	free_percpu(gcells->cells);
 	gcells->cells = NULL;
 }
-- 
cgit v1.2.3


From 90cf87d16bd566cff40c2bc8e32e6d4cd3af23f0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 25 Nov 2020 00:02:59 +0200
Subject: enetc: Let the hardware auto-advance the taprio base-time of 0

The tc-taprio base time indicates the beginning of the tc-taprio
schedule, which is cyclic by definition (where the length of the cycle
in nanoseconds is called the cycle time). The base time is a 64-bit PTP
time in the TAI domain.

Logically, the base-time should be a future time. But that imposes some
restrictions to user space, which has to retrieve the current PTP time
from the NIC first, then calculate a base time that will still be larger
than the base time by the time the kernel driver programs this value
into the hardware. Actually ensuring that the programmed base time is in
the future is still a problem even if the kernel alone deals with this.

Luckily, the enetc hardware already advances a base-time that is in the
past into a congruent time in the immediate future, according to the
same formula that can be found in the software implementation of taprio
(in taprio_get_start_time):

	/* Schedule the start time for the beginning of the next
	 * cycle.
	 */
	n = div64_s64(ktime_sub_ns(now, base), cycle);
	*start = ktime_add_ns(base, (n + 1) * cycle);

There's only one problem: the driver doesn't let the hardware do that.
It interferes with the base-time passed from user space, by special-casing
the situation when the base-time is zero, and replaces that with the
current PTP time. This changes the intended effective base-time of the
schedule, which will in the end have a different phase offset than if
the base-time of 0.000000000 was to be advanced by an integer multiple
of the cycle-time.

Fixes: 34c6adf1977b ("enetc: Configure the Time-Aware Scheduler via tc-taprio offload")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20201124220259.3027991-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc_qos.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 827f74e86d34..dbceb99c4441 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -92,18 +92,8 @@ static int enetc_setup_taprio(struct net_device *ndev,
 	gcl_config->atc = 0xff;
 	gcl_config->acl_len = cpu_to_le16(gcl_len);
 
-	if (!admin_conf->base_time) {
-		gcl_data->btl =
-			cpu_to_le32(enetc_rd(&priv->si->hw, ENETC_SICTR0));
-		gcl_data->bth =
-			cpu_to_le32(enetc_rd(&priv->si->hw, ENETC_SICTR1));
-	} else {
-		gcl_data->btl =
-			cpu_to_le32(lower_32_bits(admin_conf->base_time));
-		gcl_data->bth =
-			cpu_to_le32(upper_32_bits(admin_conf->base_time));
-	}
-
+	gcl_data->btl = cpu_to_le32(lower_32_bits(admin_conf->base_time));
+	gcl_data->bth = cpu_to_le32(upper_32_bits(admin_conf->base_time));
 	gcl_data->ct = cpu_to_le32(admin_conf->cycle_time);
 	gcl_data->cte = cpu_to_le32(admin_conf->cycle_time_extension);
 
-- 
cgit v1.2.3


From 3cb2e6d92be637b79d6ba0746d610a8dfcc0400b Mon Sep 17 00:00:00 2001
From: Min Li <min.li.xe@renesas.com>
Date: Tue, 24 Nov 2020 21:58:35 -0500
Subject: ptp: clockmatrix: bug fix for idtcm_strverscmp

Feed kstrtou8 with NULL terminated string.

Changes since v1:
-Use sscanf to get rid of adhoc string parse.
Changes since v2:
-Check if sscanf returns 3.

Fixes: 7ea5fda2b132 ("ptp: ptp_clockmatrix: update to support 4.8.7 firmware")
Signed-off-by: Min Li <min.li.xe@renesas.com>
Link: https://lore.kernel.org/r/1606273115-25792-1-git-send-email-min.li.xe@renesas.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/ptp/ptp_clockmatrix.c | 49 ++++++++++++++-----------------------------
 1 file changed, 16 insertions(+), 33 deletions(-)

diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index e020faff7da5..663255774c0b 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -103,43 +103,26 @@ static int timespec_to_char_array(struct timespec64 const *ts,
 	return 0;
 }
 
-static int idtcm_strverscmp(const char *ver1, const char *ver2)
+static int idtcm_strverscmp(const char *version1, const char *version2)
 {
-	u8 num1;
-	u8 num2;
-	int result = 0;
-
-	/* loop through each level of the version string */
-	while (result == 0) {
-		/* extract leading version numbers */
-		if (kstrtou8(ver1, 10, &num1) < 0)
-			return -1;
+	u8 ver1[3], ver2[3];
+	int i;
 
-		if (kstrtou8(ver2, 10, &num2) < 0)
-			return -1;
+	if (sscanf(version1, "%hhu.%hhu.%hhu",
+		   &ver1[0], &ver1[1], &ver1[2]) != 3)
+		return -1;
+	if (sscanf(version2, "%hhu.%hhu.%hhu",
+		   &ver2[0], &ver2[1], &ver2[2]) != 3)
+		return -1;
 
-		/* if numbers differ, then set the result */
-		if (num1 < num2)
-			result = -1;
-		else if (num1 > num2)
-			result = 1;
-		else {
-			/* if numbers are the same, go to next level */
-			ver1 = strchr(ver1, '.');
-			ver2 = strchr(ver2, '.');
-			if (!ver1 && !ver2)
-				break;
-			else if (!ver1)
-				result = -1;
-			else if (!ver2)
-				result = 1;
-			else {
-				ver1++;
-				ver2++;
-			}
-		}
+	for (i = 0; i < 3; i++) {
+		if (ver1[i] > ver2[i])
+			return 1;
+		if (ver1[i] < ver2[i])
+			return -1;
 	}
-	return result;
+
+	return 0;
 }
 
 static int idtcm_xfer_read(struct idtcm *idtcm,
-- 
cgit v1.2.3


From b187c9b4178b87954dbc94e78a7094715794714f Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 25 Nov 2020 11:16:19 +0200
Subject: devlink: Hold rtnl lock while reading netdev attributes

A netdevice of a devlink port can be moved to different net namespace
than its parent devlink instance.
This scenario occurs when devlink reload is not used.

When netdevice is undergoing migration to net namespace, its ifindex
and name may change.

In such use case, devlink port query may read stale netdev attributes.

Fix it by reading them under rtnl lock.

Fixes: bfcd3a466172 ("Introduce devlink infrastructure")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index c91e15b7a2bd..79b8df41bea3 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -772,6 +772,8 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 	if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index))
 		goto nla_put_failure;
 
+	/* Hold rtnl lock while accessing port's netdev attributes. */
+	rtnl_lock();
 	spin_lock_bh(&devlink_port->type_lock);
 	if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type))
 		goto nla_put_failure_type_locked;
@@ -798,6 +800,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 			goto nla_put_failure_type_locked;
 	}
 	spin_unlock_bh(&devlink_port->type_lock);
+	rtnl_unlock();
 	if (devlink_nl_port_attrs_put(msg, devlink_port))
 		goto nla_put_failure;
 	if (devlink_nl_port_function_attrs_put(msg, devlink_port, extack))
@@ -808,6 +811,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 
 nla_put_failure_type_locked:
 	spin_unlock_bh(&devlink_port->type_lock);
+	rtnl_unlock();
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
 	return -EMSGSIZE;
-- 
cgit v1.2.3


From a7b43649507dae4e55ff0087cad4e4dd1c6d5b99 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@nvidia.com>
Date: Wed, 25 Nov 2020 11:16:20 +0200
Subject: devlink: Make sure devlink instance and port are in same net
 namespace

When devlink reload operation is not used, netdev of an Ethernet port may
be present in different net namespace than the net namespace of the
devlink instance.

Ensure that both the devlink instance and devlink port netdev are located
in same net namespace.

Fixes: 070c63f20f6c ("net: devlink: allow to change namespaces during reload")
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 79b8df41bea3..8c5ddffd707d 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -782,9 +782,10 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink,
 			devlink_port->desired_type))
 		goto nla_put_failure_type_locked;
 	if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) {
+		struct net *net = devlink_net(devlink_port->devlink);
 		struct net_device *netdev = devlink_port->type_dev;
 
-		if (netdev &&
+		if (netdev && net_eq(net, dev_net(netdev)) &&
 		    (nla_put_u32(msg, DEVLINK_ATTR_PORT_NETDEV_IFINDEX,
 				 netdev->ifindex) ||
 		     nla_put_string(msg, DEVLINK_ATTR_PORT_NETDEV_NAME,
-- 
cgit v1.2.3


From 025cc2fb6a4e84e9a0552c0017dcd1c24b7ac7da Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 25 Nov 2020 14:18:10 -0800
Subject: net/tls: Protect from calling tls_dev_del for TLS RX twice

tls_device_offload_cleanup_rx doesn't clear tls_ctx->netdev after
calling tls_dev_del if TLX TX offload is also enabled. Clearing
tls_ctx->netdev gets postponed until tls_device_gc_task. It leaves a
time frame when tls_device_down may get called and call tls_dev_del for
RX one extra time, confusing the driver, which may lead to a crash.

This patch corrects this racy behavior by adding a flag to prevent
tls_device_down from calling tls_dev_del the second time.

Fixes: e8f69799810c ("net/tls: Add generic NIC offload infrastructure")
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Link: https://lore.kernel.org/r/20201125221810.69870-1-saeedm@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/tls.h    | 6 ++++++
 net/tls/tls_device.c | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index cf1473099453..2bdd802212fe 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -199,6 +199,12 @@ enum tls_context_flags {
 	 * to be atomic.
 	 */
 	TLS_TX_SYNC_SCHED = 1,
+	/* tls_dev_del was called for the RX side, device state was released,
+	 * but tls_ctx->netdev might still be kept, because TX-side driver
+	 * resources might not be released yet. Used to prevent the second
+	 * tls_dev_del call in tls_device_down if it happens simultaneously.
+	 */
+	TLS_RX_DEV_CLOSED = 2,
 };
 
 struct cipher_context {
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index 54d3e161d198..a3ab2d3d4e4e 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -1262,6 +1262,8 @@ void tls_device_offload_cleanup_rx(struct sock *sk)
 	if (tls_ctx->tx_conf != TLS_HW) {
 		dev_put(netdev);
 		tls_ctx->netdev = NULL;
+	} else {
+		set_bit(TLS_RX_DEV_CLOSED, &tls_ctx->flags);
 	}
 out:
 	up_read(&device_offload_lock);
@@ -1291,7 +1293,8 @@ static int tls_device_down(struct net_device *netdev)
 		if (ctx->tx_conf == TLS_HW)
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_TX);
-		if (ctx->rx_conf == TLS_HW)
+		if (ctx->rx_conf == TLS_HW &&
+		    !test_bit(TLS_RX_DEV_CLOSED, &ctx->flags))
 			netdev->tlsdev_ops->tls_dev_del(netdev, ctx,
 							TLS_OFFLOAD_CTX_DIR_RX);
 		WRITE_ONCE(ctx->netdev, NULL);
-- 
cgit v1.2.3


From cbf3d60329c4e11edcecac0c8fc6767b0f05e3a7 Mon Sep 17 00:00:00 2001
From: Rohit Maheshwari <rohitm@chelsio.com>
Date: Wed, 25 Nov 2020 12:56:26 +0530
Subject: ch_ktls: lock is not freed

Currently lock gets freed only if timeout expires, but missed a
case when HW returns failure and goes for cleanup.

Fixes: efca3878a5fb ("ch_ktls: Issue if connection offload fails")
Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Link: https://lore.kernel.org/r/20201125072626.10861-1-rohitm@chelsio.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
index c24485c0d512..7f90b828d159 100644
--- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
+++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
@@ -544,7 +544,9 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 		/* need to wait for hw response, can't free tx_info yet. */
 		if (tx_info->open_state == CH_KTLS_OPEN_PENDING)
 			tx_info->pending_close = true;
-		/* free the lock after the cleanup */
+		else
+			spin_unlock_bh(&tx_info->lock);
+		/* if in pending close, free the lock after the cleanup */
 		goto put_module;
 	}
 	spin_unlock_bh(&tx_info->lock);
-- 
cgit v1.2.3


From 4ba1cb39fce4464151517a37ce0ac0a1a3f580d6 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Thu, 19 Nov 2020 14:03:17 +0100
Subject: can: gs_usb: fix endianess problem with candleLight firmware

The firmware on the original USB2CAN by Geschwister Schneider Technologie
Entwicklungs- und Vertriebs UG exchanges all data between the host and the
device in host byte order. This is done with the struct
gs_host_config::byte_order member, which is sent first to indicate the desired
byte order.

The widely used open source firmware candleLight doesn't support this feature
and exchanges the data in little endian byte order. This breaks if a device
with candleLight firmware is used on big endianess systems.

To fix this problem, all u32 (but not the struct gs_host_frame::echo_id, which
is a transparent cookie) are converted to __le32.

Cc: Maximilian Schneider <max@schneidersoft.net>
Cc: Hubert Denkmair <hubert@denkmair.de>
Reported-by: Michael Rausch <mr@netadair.de>
Link: https://lore.kernel.org/r/b58aace7-61f3-6df7-c6df-69fee2c66906@netadair.de
Tested-by: Oleksij Rempel <o.rempel@pengutronix.de>
Fixes: d08e973a77d1 ("can: gs_usb: Added support for the GS_USB CAN devices")
Link: https://lore.kernel.org/r/20201120103818.3386964-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 131 +++++++++++++++++++++++--------------------
 1 file changed, 70 insertions(+), 61 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 3005157059ca..018ca3b057a3 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -63,21 +63,27 @@ enum gs_can_identify_mode {
 };
 
 /* data types passed between host and device */
+
+/* The firmware on the original USB2CAN by Geschwister Schneider
+ * Technologie Entwicklungs- und Vertriebs UG exchanges all data
+ * between the host and the device in host byte order. This is done
+ * with the struct gs_host_config::byte_order member, which is sent
+ * first to indicate the desired byte order.
+ *
+ * The widely used open source firmware candleLight doesn't support
+ * this feature and exchanges the data in little endian byte order.
+ */
 struct gs_host_config {
-	u32 byte_order;
+	__le32 byte_order;
 } __packed;
-/* All data exchanged between host and device is exchanged in host byte order,
- * thanks to the struct gs_host_config byte_order member, which is sent first
- * to indicate the desired byte order.
- */
 
 struct gs_device_config {
 	u8 reserved1;
 	u8 reserved2;
 	u8 reserved3;
 	u8 icount;
-	u32 sw_version;
-	u32 hw_version;
+	__le32 sw_version;
+	__le32 hw_version;
 } __packed;
 
 #define GS_CAN_MODE_NORMAL               0
@@ -87,26 +93,26 @@ struct gs_device_config {
 #define GS_CAN_MODE_ONE_SHOT             BIT(3)
 
 struct gs_device_mode {
-	u32 mode;
-	u32 flags;
+	__le32 mode;
+	__le32 flags;
 } __packed;
 
 struct gs_device_state {
-	u32 state;
-	u32 rxerr;
-	u32 txerr;
+	__le32 state;
+	__le32 rxerr;
+	__le32 txerr;
 } __packed;
 
 struct gs_device_bittiming {
-	u32 prop_seg;
-	u32 phase_seg1;
-	u32 phase_seg2;
-	u32 sjw;
-	u32 brp;
+	__le32 prop_seg;
+	__le32 phase_seg1;
+	__le32 phase_seg2;
+	__le32 sjw;
+	__le32 brp;
 } __packed;
 
 struct gs_identify_mode {
-	u32 mode;
+	__le32 mode;
 } __packed;
 
 #define GS_CAN_FEATURE_LISTEN_ONLY      BIT(0)
@@ -117,23 +123,23 @@ struct gs_identify_mode {
 #define GS_CAN_FEATURE_IDENTIFY         BIT(5)
 
 struct gs_device_bt_const {
-	u32 feature;
-	u32 fclk_can;
-	u32 tseg1_min;
-	u32 tseg1_max;
-	u32 tseg2_min;
-	u32 tseg2_max;
-	u32 sjw_max;
-	u32 brp_min;
-	u32 brp_max;
-	u32 brp_inc;
+	__le32 feature;
+	__le32 fclk_can;
+	__le32 tseg1_min;
+	__le32 tseg1_max;
+	__le32 tseg2_min;
+	__le32 tseg2_max;
+	__le32 sjw_max;
+	__le32 brp_min;
+	__le32 brp_max;
+	__le32 brp_inc;
 } __packed;
 
 #define GS_CAN_FLAG_OVERFLOW 1
 
 struct gs_host_frame {
 	u32 echo_id;
-	u32 can_id;
+	__le32 can_id;
 
 	u8 can_dlc;
 	u8 channel;
@@ -329,13 +335,13 @@ static void gs_usb_receive_bulk_callback(struct urb *urb)
 		if (!skb)
 			return;
 
-		cf->can_id = hf->can_id;
+		cf->can_id = le32_to_cpu(hf->can_id);
 
 		cf->can_dlc = get_can_dlc(hf->can_dlc);
 		memcpy(cf->data, hf->data, 8);
 
 		/* ERROR frames tell us information about the controller */
-		if (hf->can_id & CAN_ERR_FLAG)
+		if (le32_to_cpu(hf->can_id) & CAN_ERR_FLAG)
 			gs_update_state(dev, cf);
 
 		netdev->stats.rx_packets++;
@@ -418,11 +424,11 @@ static int gs_usb_set_bittiming(struct net_device *netdev)
 	if (!dbt)
 		return -ENOMEM;
 
-	dbt->prop_seg = bt->prop_seg;
-	dbt->phase_seg1 = bt->phase_seg1;
-	dbt->phase_seg2 = bt->phase_seg2;
-	dbt->sjw = bt->sjw;
-	dbt->brp = bt->brp;
+	dbt->prop_seg = cpu_to_le32(bt->prop_seg);
+	dbt->phase_seg1 = cpu_to_le32(bt->phase_seg1);
+	dbt->phase_seg2 = cpu_to_le32(bt->phase_seg2);
+	dbt->sjw = cpu_to_le32(bt->sjw);
+	dbt->brp = cpu_to_le32(bt->brp);
 
 	/* request bit timings */
 	rc = usb_control_msg(interface_to_usbdev(intf),
@@ -503,7 +509,7 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb,
 
 	cf = (struct can_frame *)skb->data;
 
-	hf->can_id = cf->can_id;
+	hf->can_id = cpu_to_le32(cf->can_id);
 	hf->can_dlc = cf->can_dlc;
 	memcpy(hf->data, cf->data, cf->can_dlc);
 
@@ -573,6 +579,7 @@ static int gs_can_open(struct net_device *netdev)
 	int rc, i;
 	struct gs_device_mode *dm;
 	u32 ctrlmode;
+	u32 flags = 0;
 
 	rc = open_candev(netdev);
 	if (rc)
@@ -640,24 +647,24 @@ static int gs_can_open(struct net_device *netdev)
 
 	/* flags */
 	ctrlmode = dev->can.ctrlmode;
-	dm->flags = 0;
 
 	if (ctrlmode & CAN_CTRLMODE_LOOPBACK)
-		dm->flags |= GS_CAN_MODE_LOOP_BACK;
+		flags |= GS_CAN_MODE_LOOP_BACK;
 	else if (ctrlmode & CAN_CTRLMODE_LISTENONLY)
-		dm->flags |= GS_CAN_MODE_LISTEN_ONLY;
+		flags |= GS_CAN_MODE_LISTEN_ONLY;
 
 	/* Controller is not allowed to retry TX
 	 * this mode is unavailable on atmels uc3c hardware
 	 */
 	if (ctrlmode & CAN_CTRLMODE_ONE_SHOT)
-		dm->flags |= GS_CAN_MODE_ONE_SHOT;
+		flags |= GS_CAN_MODE_ONE_SHOT;
 
 	if (ctrlmode & CAN_CTRLMODE_3_SAMPLES)
-		dm->flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
+		flags |= GS_CAN_MODE_TRIPLE_SAMPLE;
 
 	/* finally start device */
-	dm->mode = GS_CAN_MODE_START;
+	dm->mode = cpu_to_le32(GS_CAN_MODE_START);
+	dm->flags = cpu_to_le32(flags);
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface), 0),
 			     GS_USB_BREQ_MODE,
@@ -737,9 +744,9 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 		return -ENOMEM;
 
 	if (do_identify)
-		imode->mode = GS_CAN_IDENTIFY_ON;
+		imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_ON);
 	else
-		imode->mode = GS_CAN_IDENTIFY_OFF;
+		imode->mode = cpu_to_le32(GS_CAN_IDENTIFY_OFF);
 
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -790,6 +797,7 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 	struct net_device *netdev;
 	int rc;
 	struct gs_device_bt_const *bt_const;
+	u32 feature;
 
 	bt_const = kmalloc(sizeof(*bt_const), GFP_KERNEL);
 	if (!bt_const)
@@ -830,14 +838,14 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 
 	/* dev setup */
 	strcpy(dev->bt_const.name, "gs_usb");
-	dev->bt_const.tseg1_min = bt_const->tseg1_min;
-	dev->bt_const.tseg1_max = bt_const->tseg1_max;
-	dev->bt_const.tseg2_min = bt_const->tseg2_min;
-	dev->bt_const.tseg2_max = bt_const->tseg2_max;
-	dev->bt_const.sjw_max = bt_const->sjw_max;
-	dev->bt_const.brp_min = bt_const->brp_min;
-	dev->bt_const.brp_max = bt_const->brp_max;
-	dev->bt_const.brp_inc = bt_const->brp_inc;
+	dev->bt_const.tseg1_min = le32_to_cpu(bt_const->tseg1_min);
+	dev->bt_const.tseg1_max = le32_to_cpu(bt_const->tseg1_max);
+	dev->bt_const.tseg2_min = le32_to_cpu(bt_const->tseg2_min);
+	dev->bt_const.tseg2_max = le32_to_cpu(bt_const->tseg2_max);
+	dev->bt_const.sjw_max = le32_to_cpu(bt_const->sjw_max);
+	dev->bt_const.brp_min = le32_to_cpu(bt_const->brp_min);
+	dev->bt_const.brp_max = le32_to_cpu(bt_const->brp_max);
+	dev->bt_const.brp_inc = le32_to_cpu(bt_const->brp_inc);
 
 	dev->udev = interface_to_usbdev(intf);
 	dev->iface = intf;
@@ -854,28 +862,29 @@ static struct gs_can *gs_make_candev(unsigned int channel,
 
 	/* can setup */
 	dev->can.state = CAN_STATE_STOPPED;
-	dev->can.clock.freq = bt_const->fclk_can;
+	dev->can.clock.freq = le32_to_cpu(bt_const->fclk_can);
 	dev->can.bittiming_const = &dev->bt_const;
 	dev->can.do_set_bittiming = gs_usb_set_bittiming;
 
 	dev->can.ctrlmode_supported = 0;
 
-	if (bt_const->feature & GS_CAN_FEATURE_LISTEN_ONLY)
+	feature = le32_to_cpu(bt_const->feature);
+	if (feature & GS_CAN_FEATURE_LISTEN_ONLY)
 		dev->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
 
-	if (bt_const->feature & GS_CAN_FEATURE_LOOP_BACK)
+	if (feature & GS_CAN_FEATURE_LOOP_BACK)
 		dev->can.ctrlmode_supported |= CAN_CTRLMODE_LOOPBACK;
 
-	if (bt_const->feature & GS_CAN_FEATURE_TRIPLE_SAMPLE)
+	if (feature & GS_CAN_FEATURE_TRIPLE_SAMPLE)
 		dev->can.ctrlmode_supported |= CAN_CTRLMODE_3_SAMPLES;
 
-	if (bt_const->feature & GS_CAN_FEATURE_ONE_SHOT)
+	if (feature & GS_CAN_FEATURE_ONE_SHOT)
 		dev->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
 
 	SET_NETDEV_DEV(netdev, &intf->dev);
 
-	if (dconf->sw_version > 1)
-		if (bt_const->feature & GS_CAN_FEATURE_IDENTIFY)
+	if (le32_to_cpu(dconf->sw_version) > 1)
+		if (feature & GS_CAN_FEATURE_IDENTIFY)
 			netdev->ethtool_ops = &gs_usb_ethtool_ops;
 
 	kfree(bt_const);
@@ -910,7 +919,7 @@ static int gs_usb_probe(struct usb_interface *intf,
 	if (!hconf)
 		return -ENOMEM;
 
-	hconf->byte_order = 0x0000beef;
+	hconf->byte_order = cpu_to_le32(0x0000beef);
 
 	/* send host config */
 	rc = usb_control_msg(interface_to_usbdev(intf),
-- 
cgit v1.2.3


From 1a1c436bad340cea1cff815dd2cbb2c4f6af8d43 Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Sat, 21 Nov 2020 20:14:19 +0100
Subject: can: mcp251xfd: mcp251xfd_probe(): bail out if no IRQ was given

This patch add a check to the mcp251xfd_probe() function to bail out and give
the user a proper error message if no IRQ is specified. Otherwise the driver
will probe just fine but ifup will fail with a meaningless "RTNETLINK answers:
Invalid argument" error message.

Link: https://lore.kernel.org/r/20201123113522.3820052-1-mkl@pengutronix.de
Reported-by: Niels Petter <petter@ka-long.de>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
index 9c215f7c5f81..8a39be076e14 100644
--- a/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
+++ b/drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
@@ -2738,6 +2738,10 @@ static int mcp251xfd_probe(struct spi_device *spi)
 	u32 freq;
 	int err;
 
+	if (!spi->irq)
+		return dev_err_probe(&spi->dev, -ENXIO,
+				     "No IRQ specified (maybe node \"interrupts-extended\" in DT missing)!\n");
+
 	rx_int = devm_gpiod_get_optional(&spi->dev, "microchip,rx-int",
 					 GPIOD_IN);
 	if (PTR_ERR(rx_int) == -EPROBE_DEFER)
-- 
cgit v1.2.3


From 865f5b671b48d0088ce981cff1e822d9f7da441f Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Fri, 27 Nov 2020 08:35:12 +0100
Subject: can: m_can: m_can_open(): remove IRQF_TRIGGER_FALLING from
 request_threaded_irq()'s flags

The threaded IRQ handler is used for the tcan4x5x driver only. The IRQ pin of
the tcan4x5x controller is active low, so better not use IRQF_TRIGGER_FALLING
when requesting the IRQ. As this can result in missing interrupts.

Further, if the device tree specified the interrupt as "IRQ_TYPE_LEVEL_LOW",
unloading and reloading of the driver results in the following error during
ifup:

| irq: type mismatch, failed to map hwirq-31 for gpio@20a8000!
| tcan4x5x spi1.1: m_can device registered (irq=0, version=32)
| tcan4x5x spi1.1 can2: TCAN4X5X successfully initialized.
| tcan4x5x spi1.1 can2: failed to request interrupt

This patch fixes the problem by removing the IRQF_TRIGGER_FALLING from the
request_threaded_irq().

Fixes: f524f829b75a ("can: m_can: Create a m_can platform framework")
Cc: Dan Murphy <dmurphy@ti.com>
Cc: Sriram Dash <sriram.dash@samsung.com>
Cc: Pankaj Sharma <pankj.sharma@samsung.com>
Link: https://lore.kernel.org/r/20201127093548.509253-1-mkl@pengutronix.de
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index f3fc37e96b08..c62439bdee28 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1653,7 +1653,7 @@ static int m_can_open(struct net_device *dev)
 		INIT_WORK(&cdev->tx_work, m_can_tx_work_queue);
 
 		err = request_threaded_irq(dev->irq, NULL, m_can_isr,
-					   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
+					   IRQF_ONESHOT,
 					   dev->name, dev);
 	} else {
 		err = request_irq(dev->irq, m_can_isr, IRQF_SHARED, dev->name,
-- 
cgit v1.2.3


From e3409e4192535fbcc86a84b7a65d9351f46039ec Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Tue, 24 Nov 2020 19:47:38 +0100
Subject: can: m_can: fix nominal bitiming tseg2 min for version >= 3.1

At lest the revision 3.3.0 of the bosch m_can IP core specifies that valid
register values for "Nominal Time segment after sample point (NTSEG2)" are from
1 to 127. As the hardware uses a value of one more than the programmed value,
mean tseg2_min is 2.

This patch fixes the tseg2_min value accordingly.

Cc: Dan Murphy <dmurphy@ti.com>
Cc: Mario Huettel <mario.huettel@gmx.net>
Acked-by: Sriram Dash <sriram.dash@samsung.com>
Link: https://lore.kernel.org/r/20201124190751.3972238-1-mkl@pengutronix.de
Fixes: b03cfc5bb0e1 ("can: m_can: Enable M_CAN version dependent initialization")
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index c62439bdee28..d4030abad935 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1033,7 +1033,7 @@ static const struct can_bittiming_const m_can_bittiming_const_31X = {
 	.name = KBUILD_MODNAME,
 	.tseg1_min = 2,		/* Time segment 1 = prop_seg + phase_seg1 */
 	.tseg1_max = 256,
-	.tseg2_min = 1,		/* Time segment 2 = phase_seg2 */
+	.tseg2_min = 2,		/* Time segment 2 = phase_seg2 */
 	.tseg2_max = 128,
 	.sjw_max = 128,
 	.brp_min = 1,
-- 
cgit v1.2.3


From 5c7d55bded77da6db7c5d249610e3a2eed730b3c Mon Sep 17 00:00:00 2001
From: Pankaj Sharma <pankj.sharma@samsung.com>
Date: Thu, 26 Nov 2020 10:21:42 +0530
Subject: can: m_can: m_can_dev_setup(): add support for bosch mcan version
 3.3.0

Add support for mcan bit timing and control mode according to bosch mcan IP
version 3.3.0. The mcan version read from the Core Release field of CREL
register would be 33. Accordingly the properties are to be set for mcan v3.3.0

Signed-off-by: Pankaj Sharma <pankj.sharma@samsung.com>
Link: https://lore.kernel.org/r/1606366302-5520-1-git-send-email-pankj.sharma@samsung.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/m_can/m_can.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/can/m_can/m_can.c b/drivers/net/can/m_can/m_can.c
index d4030abad935..61a93b192037 100644
--- a/drivers/net/can/m_can/m_can.c
+++ b/drivers/net/can/m_can/m_can.c
@@ -1385,6 +1385,8 @@ static int m_can_dev_setup(struct m_can_classdev *m_can_dev)
 						&m_can_data_bittiming_const_31X;
 		break;
 	case 32:
+	case 33:
+		/* Support both MCAN version v3.2.x and v3.3.0 */
 		m_can_dev->can.bittiming_const = m_can_dev->bit_timing ?
 			m_can_dev->bit_timing : &m_can_bittiming_const_31X;
 
-- 
cgit v1.2.3


From d73ff9b7c4eacaba0fd956d14882bcae970f8307 Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Thu, 26 Nov 2020 20:21:40 +0100
Subject: can: af_can: can_rx_unregister(): remove WARN() statement from list
 operation sanity check

To detect potential bugs in CAN protocol implementations (double removal of
receiver entries) a WARN() statement has been used if no matching list item was
found for removal.

The fault injection issued by syzkaller was able to create a situation where
the closing of a socket runs simultaneously to the notifier call chain for
removing the CAN network device in use.

This case is very unlikely in real life but it doesn't break anything.
Therefore we just replace the WARN() statement with pr_warn() to preserve the
notification for the CAN protocol development.

Reported-by: syzbot+381d06e0c8eaacb8706f@syzkaller.appspotmail.com
Reported-by: syzbot+d0ddd88c9a7432f041e6@syzkaller.appspotmail.com
Reported-by: syzbot+76d62d3b8162883c7d11@syzkaller.appspotmail.com
Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Link: https://lore.kernel.org/r/20201126192140.14350-1-socketcan@hartkopp.net
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 net/can/af_can.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/can/af_can.c b/net/can/af_can.c
index 5d124c155904..4c343b43067f 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -541,10 +541,13 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id,
 
 	/* Check for bugs in CAN protocol implementations using af_can.c:
 	 * 'rcv' will be NULL if no matching list item was found for removal.
+	 * As this case may potentially happen when closing a socket while
+	 * the notifier for removing the CAN netdev is running we just print
+	 * a warning here.
 	 */
 	if (!rcv) {
-		WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n",
-		     DNAME(dev), can_id, mask);
+		pr_warn("can: receive list entry not found for dev %s, id %03X, mask %03X\n",
+			DNAME(dev), can_id, mask);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 69929d4c49e182f8526d42c43b37b460d562d3a0 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Tue, 24 Nov 2020 07:34:44 -0500
Subject: net: openvswitch: fix TTL decrement action netlink message format

Currently, the openvswitch module is not accepting the correctly formated
netlink message for the TTL decrement action. For both setting and getting
the dec_ttl action, the actions should be nested in the
OVS_DEC_TTL_ATTR_ACTION attribute as mentioned in the openvswitch.h uapi.

When the original patch was sent, it was tested with a private OVS userspace
implementation. This implementation was unfortunately not upstreamed and
reviewed, hence an erroneous version of this patch was sent out.

Leaving the patch as-is would cause problems as the kernel module could
interpret additional attributes as actions and vice-versa, due to the
actions not being encapsulated/nested within the actual attribute, but
being concatinated after it.

Fixes: 744676e77720 ("openvswitch: add TTL decrement action")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Link: https://lore.kernel.org/r/160622121495.27296.888010441924340582.stgit@wsfd-netdev64.ntdv.lab.eng.bos.redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/openvswitch.h |  2 ++
 net/openvswitch/actions.c        |  7 ++--
 net/openvswitch/flow_netlink.c   | 74 +++++++++++++++++++++++++++++-----------
 3 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8300cc29dec8..8d16744edc31 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -1058,4 +1058,6 @@ enum ovs_dec_ttl_attr {
 	__OVS_DEC_TTL_ATTR_MAX
 };
 
+#define OVS_DEC_TTL_ATTR_MAX (__OVS_DEC_TTL_ATTR_MAX - 1)
+
 #endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b87bfc82f44f..5829a020b81c 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -958,14 +958,13 @@ static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb,
 {
 	/* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */
 	struct nlattr *dec_ttl_arg = nla_data(attr);
-	int rem = nla_len(attr);
 
 	if (nla_len(dec_ttl_arg)) {
-		struct nlattr *actions = nla_next(dec_ttl_arg, &rem);
+		struct nlattr *actions = nla_data(dec_ttl_arg);
 
 		if (actions)
-			return clone_execute(dp, skb, key, 0, actions, rem,
-					     last, false);
+			return clone_execute(dp, skb, key, 0, nla_data(actions),
+					     nla_len(actions), last, false);
 	}
 	consume_skb(skb);
 	return 0;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 9d3e50c4d29f..ec0689ddc635 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -2503,28 +2503,42 @@ static int validate_and_copy_dec_ttl(struct net *net,
 				     __be16 eth_type, __be16 vlan_tci,
 				     u32 mpls_label_count, bool log)
 {
-	int start, err;
-	u32 nested = true;
+	const struct nlattr *attrs[OVS_DEC_TTL_ATTR_MAX + 1];
+	int start, action_start, err, rem;
+	const struct nlattr *a, *actions;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
 
-	if (!nla_len(attr))
-		return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL,
-					  NULL, 0, log);
+		/* Ignore unknown attributes to be future proof. */
+		if (type > OVS_DEC_TTL_ATTR_MAX)
+			continue;
+
+		if (!type || attrs[type])
+			return -EINVAL;
+
+		attrs[type] = a;
+	}
+
+	actions = attrs[OVS_DEC_TTL_ATTR_ACTION];
+	if (rem || !actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log);
 	if (start < 0)
 		return start;
 
-	err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested,
-				 sizeof(nested), log);
-
-	if (err)
-		return err;
+	action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log);
+	if (action_start < 0)
+		return start;
 
-	err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type,
+	err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type,
 				     vlan_tci, mpls_label_count, log);
 	if (err)
 		return err;
 
+	add_nested_action_end(*sfa, action_start);
 	add_nested_action_end(*sfa, start);
 	return 0;
 }
@@ -3487,20 +3501,42 @@ out:
 static int dec_ttl_action_to_attr(const struct nlattr *attr,
 				  struct sk_buff *skb)
 {
-	int err = 0, rem = nla_len(attr);
-	struct nlattr *start;
+	struct nlattr *start, *action_start;
+	const struct nlattr *a;
+	int err = 0, rem;
 
 	start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL);
-
 	if (!start)
 		return -EMSGSIZE;
 
-	err = ovs_nla_put_actions(nla_data(attr), rem, skb);
-	if (err)
-		nla_nest_cancel(skb, start);
-	else
-		nla_nest_end(skb, start);
+	nla_for_each_attr(a, nla_data(attr), nla_len(attr), rem) {
+		switch (nla_type(a)) {
+		case OVS_DEC_TTL_ATTR_ACTION:
+
+			action_start = nla_nest_start_noflag(skb, OVS_DEC_TTL_ATTR_ACTION);
+			if (!action_start) {
+				err = -EMSGSIZE;
+				goto out;
+			}
+
+			err = ovs_nla_put_actions(nla_data(a), nla_len(a), skb);
+			if (err)
+				goto out;
+
+			nla_nest_end(skb, action_start);
+			break;
 
+		default:
+			/* Ignore all other option to be future compatible */
+			break;
+		}
+	}
+
+	nla_nest_end(skb, start);
+	return 0;
+
+out:
+	nla_nest_cancel(skb, start);
 	return err;
 }
 
-- 
cgit v1.2.3


From d3ab78858f1451351221061a1c365495df196500 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 26 Nov 2020 15:17:53 +0100
Subject: mptcp: fix NULL ptr dereference on bad MPJ

If an msk listener receives an MPJ carrying an invalid token, it
will zero the request socket msk entry. That should later
cause fallback and subflow reset - as per RFC - at
subflow_syn_recv_sock() time due to failing hmac validation.

Since commit 4cf8b7e48a09 ("subflow: introduce and use
mptcp_can_accept_new_subflow()"), we unconditionally dereference
- in mptcp_can_accept_new_subflow - the subflow request msk
before performing hmac validation. In the above scenario we
hit a NULL ptr dereference.

Address the issue doing the hmac validation earlier.

Fixes: 4cf8b7e48a09 ("subflow: introduce and use mptcp_can_accept_new_subflow()")
Tested-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/r/03b2cfa3ac80d8fc18272edc6442a9ddf0b1e34e.1606400227.git.pabeni@redhat.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mptcp/subflow.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index ac4a1fe3550b..953906e40742 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -543,9 +543,8 @@ create_msk:
 			fallback = true;
 	} else if (subflow_req->mp_join) {
 		mptcp_get_options(skb, &mp_opt);
-		if (!mp_opt.mp_join ||
-		    !mptcp_can_accept_new_subflow(subflow_req->msk) ||
-		    !subflow_hmac_valid(req, &mp_opt)) {
+		if (!mp_opt.mp_join || !subflow_hmac_valid(req, &mp_opt) ||
+		    !mptcp_can_accept_new_subflow(subflow_req->msk)) {
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC);
 			fallback = true;
 		}
-- 
cgit v1.2.3


From 985f7337421a811cb354ca93882f943c8335a6f5 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 26 Nov 2020 10:12:20 -0500
Subject: sock: set sk_err to ee_errno on dequeue from errq

When setting sk_err, set it to ee_errno, not ee_origin.

Commit f5f99309fa74 ("sock: do not set sk_err in
sock_dequeue_err_skb") disabled updating sk_err on errq dequeue,
which is correct for most error types (origins):

  -       sk->sk_err = err;

Commit 38b257938ac6 ("sock: reset sk_err when the error queue is
empty") reenabled the behavior for IMCP origins, which do require it:

  +       if (icmp_next)
  +               sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;

But read from ee_errno.

Fixes: 38b257938ac6 ("sock: reset sk_err when the error queue is empty")
Reported-by: Ayush Ranjan <ayushranjan@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Link: https://lore.kernel.org/r/20201126151220.2819322-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1ba8f0163744..06c526e0d810 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4549,7 +4549,7 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 	if (skb && (skb_next = skb_peek(q))) {
 		icmp_next = is_icmp_err_skb(skb_next);
 		if (icmp_next)
-			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_origin;
+			sk->sk_err = SKB_EXT_ERR(skb_next)->ee.ee_errno;
 	}
 	spin_unlock_irqrestore(&q->lock, flags);
 
-- 
cgit v1.2.3