From 822eaf7cfb7c4783375bceadbc7651137346ac00 Mon Sep 17 00:00:00 2001
From: Yan Markman <ymarkman@marvell.com>
Date: Mon, 23 Oct 2017 15:24:29 +0200
Subject: net: mvpp2: fix TSO headers allocation and management

TSO headers are managed with txq index and therefore should be aligned
with the txq size, not with the aggregated txq size.

Fixes: 186cd4d4e414 ("net: mvpp2: software tso support")
Reported-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Yan Markman <ymarkman@marvell.com>
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 9c86cb7cb988..72e43d848034 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5609,7 +5609,7 @@ static int mvpp2_txq_init(struct mvpp2_port *port,
 
 		txq_pcpu->tso_headers =
 			dma_alloc_coherent(port->dev->dev.parent,
-					   MVPP2_AGGR_TXQ_SIZE * TSO_HEADER_SIZE,
+					   txq_pcpu->size * TSO_HEADER_SIZE,
 					   &txq_pcpu->tso_headers_dma,
 					   GFP_KERNEL);
 		if (!txq_pcpu->tso_headers)
@@ -5623,7 +5623,7 @@ cleanup:
 		kfree(txq_pcpu->buffs);
 
 		dma_free_coherent(port->dev->dev.parent,
-				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+				  txq_pcpu->size * TSO_HEADER_SIZE,
 				  txq_pcpu->tso_headers,
 				  txq_pcpu->tso_headers_dma);
 	}
@@ -5647,7 +5647,7 @@ static void mvpp2_txq_deinit(struct mvpp2_port *port,
 		kfree(txq_pcpu->buffs);
 
 		dma_free_coherent(port->dev->dev.parent,
-				  MVPP2_AGGR_TXQ_SIZE * MVPP2_DESC_ALIGNED_SIZE,
+				  txq_pcpu->size * TSO_HEADER_SIZE,
 				  txq_pcpu->tso_headers,
 				  txq_pcpu->tso_headers_dma);
 	}
-- 
cgit v1.2.3


From 20920267885218fda08dc12c7d3814938ab15b54 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Mon, 23 Oct 2017 15:24:30 +0200
Subject: net: mvpp2: do not unmap TSO headers buffers

The TSO header buffers are coming from a per cpu pool and should not
be unmapped as they are reused. The PPv2 driver was unmapping all
descriptors buffers unconditionally. This patch fixes this by checking
the buffers dma addresses before unmapping them, and by not unmapping
those who are located in the TSO header pool.

Fixes: 186cd4d4e414 ("net: mvpp2: software tso support")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 72e43d848034..3de05a8c468a 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1167,6 +1167,11 @@ struct mvpp2_bm_pool {
 	u32 port_map;
 };
 
+#define IS_TSO_HEADER(txq_pcpu, addr) \
+	((addr) >= (txq_pcpu)->tso_headers_dma && \
+	 (addr) < (txq_pcpu)->tso_headers_dma + \
+	 (txq_pcpu)->size * TSO_HEADER_SIZE)
+
 /* Queue modes */
 #define MVPP2_QDIST_SINGLE_MODE	0
 #define MVPP2_QDIST_MULTI_MODE	1
@@ -5321,8 +5326,9 @@ static void mvpp2_txq_bufs_free(struct mvpp2_port *port,
 		struct mvpp2_txq_pcpu_buf *tx_buf =
 			txq_pcpu->buffs + txq_pcpu->txq_get_index;
 
-		dma_unmap_single(port->dev->dev.parent, tx_buf->dma,
-				 tx_buf->size, DMA_TO_DEVICE);
+		if (!IS_TSO_HEADER(txq_pcpu, tx_buf->dma))
+			dma_unmap_single(port->dev->dev.parent, tx_buf->dma,
+					 tx_buf->size, DMA_TO_DEVICE);
 		if (tx_buf->skb)
 			dev_kfree_skb_any(tx_buf->skb);
 
@@ -6212,12 +6218,15 @@ static inline void
 tx_desc_unmap_put(struct mvpp2_port *port, struct mvpp2_tx_queue *txq,
 		  struct mvpp2_tx_desc *desc)
 {
+	struct mvpp2_txq_pcpu *txq_pcpu = this_cpu_ptr(txq->pcpu);
+
 	dma_addr_t buf_dma_addr =
 		mvpp2_txdesc_dma_addr_get(port, desc);
 	size_t buf_sz =
 		mvpp2_txdesc_size_get(port, desc);
-	dma_unmap_single(port->dev->dev.parent, buf_dma_addr,
-			 buf_sz, DMA_TO_DEVICE);
+	if (!IS_TSO_HEADER(txq_pcpu, buf_dma_addr))
+		dma_unmap_single(port->dev->dev.parent, buf_dma_addr,
+				 buf_sz, DMA_TO_DEVICE);
 	mvpp2_txq_desc_put(txq);
 }
 
-- 
cgit v1.2.3


From 082297e61480c4d72ed75b31077e74aca0e7c799 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Mon, 23 Oct 2017 15:24:31 +0200
Subject: net: mvpp2: do not call txq_done from the Tx path when Tx irqs are
 used

When Tx IRQs are used, txq_bufs_free() can be called from both the Tx
path and from NAPI poll(). This led to CPU stalls as if these two tasks
(Tx and Poll) are scheduled on two CPUs at the same time, DMA unmapping
operations are done on the same txq buffers.

This patch adds a check not to call txq_done() from the Tx path if Tx
interrupts are used as it does not make sense to do so.

Fixes: edc660fa09e2 ("net: mvpp2: replace TX coalescing interrupts with hrtimer")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 3de05a8c468a..28c6e8a5e118 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -6499,7 +6499,7 @@ out:
 	}
 
 	/* Finalize TX processing */
-	if (txq_pcpu->count >= txq->done_pkts_coal)
+	if (!port->has_tx_irqs && txq_pcpu->count >= txq->done_pkts_coal)
 		mvpp2_txq_done(port, txq, txq_pcpu);
 
 	/* Set the timer in case not all frags were processed */
-- 
cgit v1.2.3


From d309ae5c6a00648198d1932e6db483d612c2e260 Mon Sep 17 00:00:00 2001
From: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Date: Wed, 25 Oct 2017 11:47:05 -0700
Subject: nfp: refuse offloading filters that redirects to upper devices

Previously we did not ensure that a netdev is a representative netdev
before dereferencing its private data. This can occur when an upper netdev
is created on a representative netdev. This patch corrects this by first
ensuring that the netdev is a representative netdev before using it.
Checking only switchdev_port_same_parent_id is not sufficient to ensure
that we can safely use the netdev. Failing to check that the netdev is also
a representative netdev would result in incorrect dereferencing.

Fixes: 1a1e586f54bf ("nfp: add basic action capabilities to flower offloads")
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Pieter Jansen van Vuuren <pieter.jansenvanvuuren@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index db9750695dc7..8ea9320014ee 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -110,6 +110,8 @@ nfp_fl_output(struct nfp_fl_output *output, const struct tc_action *action,
 	 */
 	if (!switchdev_port_same_parent_id(in_dev, out_dev))
 		return -EOPNOTSUPP;
+	if (!nfp_netdev_is_nfp_repr(out_dev))
+		return -EOPNOTSUPP;
 
 	output->port = cpu_to_be32(nfp_repr_get_port_id(out_dev));
 	if (!output->port)
-- 
cgit v1.2.3


From 6377ed0bbae6fa28853e1679d068a9106c8a8908 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Thu, 19 Oct 2017 14:14:29 +0300
Subject: net/mlx5: Fix health work queue spin lock to IRQ safe

spin_lock/unlock of health->wq_lock should be IRQ safe.
It was changed to spin_lock_irqsave since adding commit 0179720d6be2
("net/mlx5: Introduce trigger_health_work function") which uses
spin_lock from asynchronous event (IRQ) context.
Thus, all spin_lock/unlock of health->wq_lock should have been moved
to IRQ safe mode.
However, one occurrence on new code using this lock missed that
change, resulting in possible deadlock:
  kernel: Possible unsafe locking scenario:
  kernel:       CPU0
  kernel:       ----
  kernel:  lock(&(&health->wq_lock)->rlock);
  kernel:  <Interrupt>
  kernel:    lock(&(&health->wq_lock)->rlock);
  kernel: #012 *** DEADLOCK ***

Fixes: 2a0165a034ac ("net/mlx5: Cancel delayed recovery work when unloading the driver")
Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/health.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index 8aea0a065e56..db86e1506c8b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -356,10 +356,11 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
 {
 	struct mlx5_core_health *health = &dev->priv.health;
+	unsigned long flags;
 
-	spin_lock(&health->wq_lock);
+	spin_lock_irqsave(&health->wq_lock, flags);
 	set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
-	spin_unlock(&health->wq_lock);
+	spin_unlock_irqrestore(&health->wq_lock, flags);
 	cancel_delayed_work_sync(&dev->priv.health.recover_work);
 }
 
-- 
cgit v1.2.3


From 4ca637a20a524cd8ddbca696f12bfa92111c96e3 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Wed, 4 Oct 2017 17:58:21 -0500
Subject: net/mlx5: Delay events till mlx5 interface's add complete for pci
 resume

mlx5_ib_add is called during mlx5_pci_resume after a pci error.
Before mlx5_ib_add completes, there are multiple events which trigger
function mlx5_ib_event. This cause kernel panic because mlx5_ib_event
accesses unitialized resources.

The fix is to extend Erez Shitrit's patch <97834eba7c19>
("net/mlx5: Delay events till ib registration ends") to cover
the pci resume code path.

Trace:
mlx5_core 0001:01:00.6: mlx5_pci_resume was called
mlx5_core 0001:01:00.6: firmware version: 16.20.1011
mlx5_core 0001:01:00.6: mlx5_attach_interface:164:(pid 779):
mlx5_ib_event:2996:(pid 34777): warning: event on port 1
mlx5_ib_event:2996:(pid 34782): warning: event on port 1
Unable to handle kernel paging request for data at address 0x0001c104
Faulting instruction address: 0xd000000008f411fc
Oops: Kernel access of bad area, sig: 11 [#1]
...
...
Call Trace:
[c000000fff77bb70] [d000000008f4119c] mlx5_ib_event+0x64/0x470 [mlx5_ib] (unreliable)
[c000000fff77bc60] [d000000008e67130] mlx5_core_event+0xb8/0x210 [mlx5_core]
[c000000fff77bd10] [d000000008e4bd00] mlx5_eq_int+0x528/0x860[mlx5_core]

Fixes: 97834eba7c19 ("net/mlx5: Delay events till ib registration ends")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c | 70 ++++++++++++++++-----------
 1 file changed, 41 insertions(+), 29 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index ff60cf7342ca..fc281712869b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -77,35 +77,41 @@ static void add_delayed_event(struct mlx5_priv *priv,
 	list_add_tail(&delayed_event->list, &priv->waiting_events_list);
 }
 
-static void fire_delayed_event_locked(struct mlx5_device_context *dev_ctx,
-				      struct mlx5_core_dev *dev,
-				      struct mlx5_priv *priv)
+static void delayed_event_release(struct mlx5_device_context *dev_ctx,
+				  struct mlx5_priv *priv)
 {
+	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv);
 	struct mlx5_delayed_event *de;
 	struct mlx5_delayed_event *n;
+	struct list_head temp;
 
-	/* stop delaying events */
-	priv->is_accum_events = false;
+	INIT_LIST_HEAD(&temp);
+
+	spin_lock_irq(&priv->ctx_lock);
 
-	/* fire all accumulated events before new event comes */
-	list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
+	priv->is_accum_events = false;
+	list_splice_init(&priv->waiting_events_list, &temp);
+	if (!dev_ctx->context)
+		goto out;
+	list_for_each_entry_safe(de, n, &priv->waiting_events_list, list)
 		dev_ctx->intf->event(dev, dev_ctx->context, de->event, de->param);
+
+out:
+	spin_unlock_irq(&priv->ctx_lock);
+
+	list_for_each_entry_safe(de, n, &temp, list) {
 		list_del(&de->list);
 		kfree(de);
 	}
 }
 
-static void cleanup_delayed_evets(struct mlx5_priv *priv)
+/* accumulating events that can come after mlx5_ib calls to
+ * ib_register_device, till adding that interface to the events list.
+ */
+static void delayed_event_start(struct mlx5_priv *priv)
 {
-	struct mlx5_delayed_event *de;
-	struct mlx5_delayed_event *n;
-
 	spin_lock_irq(&priv->ctx_lock);
-	priv->is_accum_events = false;
-	list_for_each_entry_safe(de, n, &priv->waiting_events_list, list) {
-		list_del(&de->list);
-		kfree(de);
-	}
+	priv->is_accum_events = true;
 	spin_unlock_irq(&priv->ctx_lock);
 }
 
@@ -122,11 +128,8 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		return;
 
 	dev_ctx->intf = intf;
-	/* accumulating events that can come after mlx5_ib calls to
-	 * ib_register_device, till adding that interface to the events list.
-	 */
 
-	priv->is_accum_events = true;
+	delayed_event_start(priv);
 
 	dev_ctx->context = intf->add(dev);
 	set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
@@ -137,8 +140,6 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		spin_lock_irq(&priv->ctx_lock);
 		list_add_tail(&dev_ctx->list, &priv->ctx_list);
 
-		fire_delayed_event_locked(dev_ctx, dev, priv);
-
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 		if (dev_ctx->intf->pfault) {
 			if (priv->pfault) {
@@ -150,11 +151,12 @@ void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv)
 		}
 #endif
 		spin_unlock_irq(&priv->ctx_lock);
-	} else {
-		kfree(dev_ctx);
-		 /* delete all accumulated events */
-		cleanup_delayed_evets(priv);
 	}
+
+	delayed_event_release(dev_ctx, priv);
+
+	if (!dev_ctx->context)
+		kfree(dev_ctx);
 }
 
 static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf,
@@ -205,17 +207,21 @@ static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv
 	if (!dev_ctx)
 		return;
 
+	delayed_event_start(priv);
 	if (intf->attach) {
 		if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state))
-			return;
+			goto out;
 		intf->attach(dev, dev_ctx->context);
 		set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state);
 	} else {
 		if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state))
-			return;
+			goto out;
 		dev_ctx->context = intf->add(dev);
 		set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state);
 	}
+
+out:
+	delayed_event_release(dev_ctx, priv);
 }
 
 void mlx5_attach_device(struct mlx5_core_dev *dev)
@@ -414,8 +420,14 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 	if (priv->is_accum_events)
 		add_delayed_event(priv, dev, event, param);
 
+	/* After mlx5_detach_device, the dev_ctx->intf is still set and dev_ctx is
+	 * still in priv->ctx_list. In this case, only notify the dev_ctx if its
+	 * ADDED or ATTACHED bit are set.
+	 */
 	list_for_each_entry(dev_ctx, &priv->ctx_list, list)
-		if (dev_ctx->intf->event)
+		if (dev_ctx->intf->event &&
+		    (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state) ||
+		     test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)))
 			dev_ctx->intf->event(dev, dev_ctx->context, event, param);
 
 	spin_unlock_irqrestore(&priv->ctx_lock, flags);
-- 
cgit v1.2.3


From 3c37745ec614ff048d5dce38f976804b05d307ee Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 17 Oct 2017 12:33:43 +0200
Subject: net/mlx5e: Properly deal with encap flows add/del under neigh update

Currently, the encap action offload is handled in the actions parse
function and not in mlx5e_tc_add_fdb_flow() where we deal with all
the other aspects of offloading actions (vlan, modify header) and
the rule itself.

When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the
encap entry and offloads the related flows, we wrongly call again into
mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle
again the offloading of vlans and header re-write which puts things
in non consistent state and step on freed memory (e.g the modify
header parse buffer which is already freed).

Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the
encap entry, it causes a corruption at the neigh update code which goes
over the list of flows associated with this encap entry, or double free
when the tc flow is later deleted by user-space.

When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related
to an encap entry which is now invalid, we do a partial repeat of the eswitch
flow removal code which is wrong too.

To fix things up we do the following:

(1) handle the encap action offload in the eswitch flow add function
    mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself.

(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only
    deal with the encap entry and rules delete/add and not with any of
    the other offloaded actions.

Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow')
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 89 +++++++++++++++----------
 1 file changed, 54 insertions(+), 35 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1aa2028ed995..9ba1f72060aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -78,9 +78,11 @@ struct mlx5e_tc_flow {
 };
 
 struct mlx5e_tc_flow_parse_attr {
+	struct ip_tunnel_info tun_info;
 	struct mlx5_flow_spec spec;
 	int num_mod_hdr_actions;
 	void *mod_hdr_actions;
+	int mirred_ifindex;
 };
 
 enum {
@@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 			       struct mlx5e_tc_flow *flow);
 
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+			      struct ip_tunnel_info *tun_info,
+			      struct net_device *mirred_dev,
+			      struct net_device **encap_dev,
+			      struct mlx5e_tc_flow *flow);
+
 static struct mlx5_flow_handle *
 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		      struct mlx5e_tc_flow_parse_attr *parse_attr,
@@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
-	struct mlx5_flow_handle *rule;
+	struct net_device *out_dev, *encap_dev = NULL;
+	struct mlx5_flow_handle *rule = NULL;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *out_priv;
 	int err;
 
+	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
+		out_dev = __dev_get_by_index(dev_net(priv->netdev),
+					     attr->parse_attr->mirred_ifindex);
+		err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
+					 out_dev, &encap_dev, flow);
+		if (err) {
+			rule = ERR_PTR(err);
+			if (err != -EAGAIN)
+				goto err_attach_encap;
+		}
+		out_priv = netdev_priv(encap_dev);
+		rpriv = out_priv->ppriv;
+		attr->out_rep = rpriv->rep;
+	}
+
 	err = mlx5_eswitch_add_vlan_action(esw, attr);
 	if (err) {
 		rule = ERR_PTR(err);
@@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		}
 	}
 
-	rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
-	if (IS_ERR(rule))
-		goto err_add_rule;
-
+	/* we get here if (1) there's no error (rule being null) or when
+	 * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
+	 */
+	if (rule != ERR_PTR(-EAGAIN)) {
+		rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+		if (IS_ERR(rule))
+			goto err_add_rule;
+	}
 	return rule;
 
 err_add_rule:
@@ -361,6 +391,7 @@ err_mod_hdr:
 err_add_vlan:
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
 		mlx5e_detach_encap(priv, flow);
+err_attach_encap:
 	return rule;
 }
 
@@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *esw_attr;
 	struct mlx5e_tc_flow *flow;
 	int err;
 
@@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 	mlx5e_rep_queue_neigh_stats_work(priv);
 
 	list_for_each_entry(flow, &e->flows, encap) {
-		flow->esw_attr->encap_id = e->encap_id;
-		flow->rule = mlx5e_tc_add_fdb_flow(priv,
-						   flow->esw_attr->parse_attr,
-						   flow);
+		esw_attr = flow->esw_attr;
+		esw_attr->encap_id = e->encap_id;
+		flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
 		if (IS_ERR(flow->rule)) {
 			err = PTR_ERR(flow->rule);
 			mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
@@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
 			      struct mlx5e_encap_entry *e)
 {
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5e_tc_flow *flow;
-	struct mlx5_fc *counter;
 
 	list_for_each_entry(flow, &e->flows, encap) {
 		if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
 			flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
-			counter = mlx5_flow_rule_counter(flow->rule);
-			mlx5_del_flow_rules(flow->rule);
-			mlx5_fc_destroy(priv->mdev, counter);
+			mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
 		}
 	}
 
@@ -1942,7 +1972,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 
 		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
-			struct net_device *out_dev, *encap_dev = NULL;
+			struct net_device *out_dev;
 			struct mlx5e_priv *out_priv;
 
 			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
@@ -1955,17 +1985,13 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				rpriv = out_priv->ppriv;
 				attr->out_rep = rpriv->rep;
 			} else if (encap) {
-				err = mlx5e_attach_encap(priv, info,
-							 out_dev, &encap_dev, flow);
-				if (err && err != -EAGAIN)
-					return err;
+				parse_attr->mirred_ifindex = ifindex;
+				parse_attr->tun_info = *info;
+				attr->parse_attr = parse_attr;
 				attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
 					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
 					MLX5_FLOW_CONTEXT_ACTION_COUNT;
-				out_priv = netdev_priv(encap_dev);
-				rpriv = out_priv->ppriv;
-				attr->out_rep = rpriv->rep;
-				attr->parse_attr = parse_attr;
+				/* attr->out_rep is resolved when we handle encap */
 			} else {
 				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
 				       priv->netdev->name, out_dev->name);
@@ -2047,7 +2073,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 	if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
 		err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
 		if (err < 0)
-			goto err_handle_encap_flow;
+			goto err_free;
 		flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
 	} else {
 		err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
@@ -2058,10 +2084,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 
 	if (IS_ERR(flow->rule)) {
 		err = PTR_ERR(flow->rule);
-		goto err_free;
+		if (err != -EAGAIN)
+			goto err_free;
 	}
 
-	flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+	if (err != -EAGAIN)
+		flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
+
 	err = rhashtable_insert_fast(&tc->ht, &flow->node,
 				     tc->ht_params);
 	if (err)
@@ -2075,16 +2104,6 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv,
 err_del_rule:
 	mlx5e_tc_del_flow(priv, flow);
 
-err_handle_encap_flow:
-	if (err == -EAGAIN) {
-		err = rhashtable_insert_fast(&tc->ht, &flow->node,
-					     tc->ht_params);
-		if (err)
-			mlx5e_tc_del_flow(priv, flow);
-		else
-			return 0;
-	}
-
 err_free:
 	kvfree(parse_attr);
 	kfree(flow);
-- 
cgit v1.2.3


From be0f161ef141e4df368aa3f417a1c2ab9c362e75 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Thu, 28 Sep 2017 15:33:50 -0500
Subject: net/mlx5e: DCBNL, Implement tc with ets type and zero bandwidth

Previously, tc with ets type and zero bandwidth is not accepted
by driver. This behavior does not follow the IEEE802.1qaz spec.

If there are tcs with ets type and zero bandwidth, these tcs are
assigned to the lowest priority tc_group #0. We equally distribute
100% bw of the tc_group #0 to these zero bandwidth ets tcs.
Also, the non zero bandwidth ets tcs are assigned to tc_group #1.

If there is no zero bandwidth ets tc, the non zero bandwidth ets tcs
are assigned to tc_group #0.

Fixes: cdcf11212b22 ("net/mlx5e: Validate BW weight values of ETS")
Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 113 +++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/port.c     |  21 ++++
 include/linux/mlx5/port.h                          |   2 +
 3 files changed, 106 insertions(+), 30 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index c1d384fca4dc..51c4cc00a186 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -41,6 +41,11 @@
 #define MLX5E_CEE_STATE_UP    1
 #define MLX5E_CEE_STATE_DOWN  0
 
+enum {
+	MLX5E_VENDOR_TC_GROUP_NUM = 7,
+	MLX5E_LOWEST_PRIO_GROUP   = 0,
+};
+
 /* If dcbx mode is non-host set the dcbx mode to host.
  */
 static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
@@ -85,6 +90,9 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 tc_group[IEEE_8021QAZ_MAX_TCS];
+	bool is_tc_group_6_exist = false;
+	bool is_zero_bw_ets_tc = false;
 	int err = 0;
 	int i;
 
@@ -96,37 +104,64 @@ static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 		err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
 		if (err)
 			return err;
-	}
 
-	for (i = 0; i < ets->ets_cap; i++) {
+		err = mlx5_query_port_tc_group(mdev, i, &tc_group[i]);
+		if (err)
+			return err;
+
 		err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
 		if (err)
 			return err;
+
+		if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC &&
+		    tc_group[i] == (MLX5E_LOWEST_PRIO_GROUP + 1))
+			is_zero_bw_ets_tc = true;
+
+		if (tc_group[i] == (MLX5E_VENDOR_TC_GROUP_NUM - 1))
+			is_tc_group_6_exist = true;
+	}
+
+	/* Report 0% ets tc if exits*/
+	if (is_zero_bw_ets_tc) {
+		for (i = 0; i < ets->ets_cap; i++)
+			if (tc_group[i] == MLX5E_LOWEST_PRIO_GROUP)
+				ets->tc_tx_bw[i] = 0;
+	}
+
+	/* Update tc_tsa based on fw setting*/
+	for (i = 0; i < ets->ets_cap; i++) {
 		if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
 			priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+		else if (tc_group[i] == MLX5E_VENDOR_TC_GROUP_NUM &&
+			 !is_tc_group_6_exist)
+			priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
 	}
-
 	memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
 
 	return err;
 }
 
-enum {
-	MLX5E_VENDOR_TC_GROUP_NUM = 7,
-	MLX5E_ETS_TC_GROUP_NUM    = 0,
-};
-
 static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
 {
 	bool any_tc_mapped_to_ets = false;
+	bool ets_zero_bw = false;
 	int strict_group;
 	int i;
 
-	for (i = 0; i <= max_tc; i++)
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
+	for (i = 0; i <= max_tc; i++) {
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
 			any_tc_mapped_to_ets = true;
+			if (!ets->tc_tx_bw[i])
+				ets_zero_bw = true;
+		}
+	}
 
-	strict_group = any_tc_mapped_to_ets ? 1 : 0;
+	/* strict group has higher priority than ets group */
+	strict_group = MLX5E_LOWEST_PRIO_GROUP;
+	if (any_tc_mapped_to_ets)
+		strict_group++;
+	if (ets_zero_bw)
+		strict_group++;
 
 	for (i = 0; i <= max_tc; i++) {
 		switch (ets->tc_tsa[i]) {
@@ -137,7 +172,9 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
 			tc_group[i] = strict_group++;
 			break;
 		case IEEE_8021QAZ_TSA_ETS:
-			tc_group[i] = MLX5E_ETS_TC_GROUP_NUM;
+			tc_group[i] = MLX5E_LOWEST_PRIO_GROUP;
+			if (ets->tc_tx_bw[i] && ets_zero_bw)
+				tc_group[i] = MLX5E_LOWEST_PRIO_GROUP + 1;
 			break;
 		}
 	}
@@ -146,8 +183,22 @@ static void mlx5e_build_tc_group(struct ieee_ets *ets, u8 *tc_group, int max_tc)
 static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
 				 u8 *tc_group, int max_tc)
 {
+	int bw_for_ets_zero_bw_tc = 0;
+	int last_ets_zero_bw_tc = -1;
+	int num_ets_zero_bw = 0;
 	int i;
 
+	for (i = 0; i <= max_tc; i++) {
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS &&
+		    !ets->tc_tx_bw[i]) {
+			num_ets_zero_bw++;
+			last_ets_zero_bw_tc = i;
+		}
+	}
+
+	if (num_ets_zero_bw)
+		bw_for_ets_zero_bw_tc = MLX5E_MAX_BW_ALLOC / num_ets_zero_bw;
+
 	for (i = 0; i <= max_tc; i++) {
 		switch (ets->tc_tsa[i]) {
 		case IEEE_8021QAZ_TSA_VENDOR:
@@ -157,12 +208,26 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
 			tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
 			break;
 		case IEEE_8021QAZ_TSA_ETS:
-			tc_tx_bw[i] = ets->tc_tx_bw[i];
+			tc_tx_bw[i] = ets->tc_tx_bw[i] ?
+				      ets->tc_tx_bw[i] :
+				      bw_for_ets_zero_bw_tc;
 			break;
 		}
 	}
+
+	/* Make sure the total bw for ets zero bw group is 100% */
+	if (last_ets_zero_bw_tc != -1)
+		tc_tx_bw[last_ets_zero_bw_tc] +=
+			MLX5E_MAX_BW_ALLOC % num_ets_zero_bw;
 }
 
+/* If there are ETS BW 0,
+ *   Set ETS group # to 1 for all ETS non zero BW tcs. Their sum must be 100%.
+ *   Set group #0 to all the ETS BW 0 tcs and
+ *     equally splits the 100% BW between them
+ *   Report both group #0 and #1 as ETS type.
+ *     All the tcs in group #0 will be reported with 0% BW.
+ */
 int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -188,7 +253,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 		return err;
 
 	memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
-
 	return err;
 }
 
@@ -209,17 +273,9 @@ static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
 	}
 
 	/* Validate Bandwidth Sum */
-	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
-		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS) {
-			if (!ets->tc_tx_bw[i]) {
-				netdev_err(netdev,
-					   "Failed to validate ETS: BW 0 is illegal\n");
-				return -EINVAL;
-			}
-
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_ETS)
 			bw_sum += ets->tc_tx_bw[i];
-		}
-	}
 
 	if (bw_sum != 0 && bw_sum != 100) {
 		netdev_err(netdev,
@@ -533,8 +589,7 @@ static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
 static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
 				      int pgid, u8 *bw_pct)
 {
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5_core_dev *mdev = priv->mdev;
+	struct ieee_ets ets;
 
 	if (pgid >= CEE_DCBX_MAX_PGS) {
 		netdev_err(netdev,
@@ -542,8 +597,8 @@ static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
 		return;
 	}
 
-	if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct))
-		*bw_pct = 0;
+	mlx5e_dcbnl_ieee_getets(netdev, &ets);
+	*bw_pct = ets.tc_tx_bw[pgid];
 }
 
 static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
@@ -739,8 +794,6 @@ static void mlx5e_ets_init(struct mlx5e_priv *priv)
 		ets.prio_tc[i] = i;
 	}
 
-	memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa));
-
 	/* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
 	ets.prio_tc[0] = 1;
 	ets.prio_tc[1] = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 1975d4388d4f..e07061f565d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -677,6 +677,27 @@ int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group);
 
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+			     u8 tc, u8 *tc_group)
+{
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+	void *ets_tcn_conf;
+	int err;
+
+	err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+	if (err)
+		return err;
+
+	ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+				    tc_configuration[tc]);
+
+	*tc_group = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+			     group);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_group);
+
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 {
 	u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0};
diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h
index c57d4b7de3a8..c59af8ab753a 100644
--- a/include/linux/mlx5/port.h
+++ b/include/linux/mlx5/port.h
@@ -157,6 +157,8 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc);
 int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
 			    u8 prio, u8 *tc);
 int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group);
+int mlx5_query_port_tc_group(struct mlx5_core_dev *mdev,
+			     u8 tc, u8 *tc_group);
 int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw);
 int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
 				u8 tc, u8 *bw_pct);
-- 
cgit v1.2.3


From ef4816f0ee576d4a27ed35cd1090904121391cb9 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Tue, 24 Oct 2017 11:41:26 +0200
Subject: net: mvpp2: fix typo in the tcam setup

This patch fixes a typo in the mvpp2_prs_tcam_data_cmp() function, as
the shift value is inverted with the data.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 28c6e8a5e118..0b2017170d5b 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -1539,7 +1539,7 @@ static bool mvpp2_prs_tcam_data_cmp(struct mvpp2_prs_entry *pe, int offs,
 	int off = MVPP2_PRS_TCAM_DATA_BYTE(offs);
 	u16 tcam_data;
 
-	tcam_data = (8 << pe->tcam.byte[off + 1]) | pe->tcam.byte[off];
+	tcam_data = (pe->tcam.byte[off + 1] << 8) | pe->tcam.byte[off];
 	if (tcam_data != data)
 		return false;
 	return true;
-- 
cgit v1.2.3


From 20746d717ea390ac6ac3aa531f27ac156bf2e747 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Tue, 24 Oct 2017 11:41:27 +0200
Subject: net: mvpp2: fix invalid parameters order when calling the tcam init

When calling mvpp2_prs_mac_multi_set() from mvpp2_prs_mac_init(), two
parameters (the port index and the table index) are inverted. Fixes
this.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 0b2017170d5b..c8ce652a4dd7 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -2614,8 +2614,8 @@ static void mvpp2_prs_mac_init(struct mvpp2 *priv)
 	/* place holders only - no ports */
 	mvpp2_prs_mac_drop_all_set(priv, 0, false);
 	mvpp2_prs_mac_promisc_set(priv, 0, false);
-	mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_ALL, 0, false);
-	mvpp2_prs_mac_multi_set(priv, MVPP2_PE_MAC_MC_IP6, 0, false);
+	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_ALL, false);
+	mvpp2_prs_mac_multi_set(priv, 0, MVPP2_PE_MAC_MC_IP6, false);
 }
 
 /* Set default entries for various types of dsa packets */
-- 
cgit v1.2.3


From 239dd4ee4838523419ad16e05b16a2003b71d317 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Tue, 24 Oct 2017 11:41:28 +0200
Subject: net: mvpp2: do not sleep in set_rx_mode

This patch replaces GFP_KERNEL by GFP_ATOMIC to avoid sleeping in the
ndo_set_rx_mode() call which is called with BH disabled.

Fixes: 3f518509dedc ("ethernet: Add new driver for Marvell Armada 375 network unit")
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvpp2.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index c8ce652a4dd7..a37af5813f33 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -3396,7 +3396,7 @@ mvpp2_prs_mac_da_range_find(struct mvpp2 *priv, int pmap, const u8 *da,
 	struct mvpp2_prs_entry *pe;
 	int tid;
 
-	pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+	pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
 	if (!pe)
 		return NULL;
 	mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
@@ -3458,7 +3458,7 @@ static int mvpp2_prs_mac_da_accept(struct mvpp2 *priv, int port,
 		if (tid < 0)
 			return tid;
 
-		pe = kzalloc(sizeof(*pe), GFP_KERNEL);
+		pe = kzalloc(sizeof(*pe), GFP_ATOMIC);
 		if (!pe)
 			return -ENOMEM;
 		mvpp2_prs_tcam_lu_set(pe, MVPP2_PRS_LU_MAC);
-- 
cgit v1.2.3


From 44c445c3d1b4eacff23141fa7977c3b2ec3a45c9 Mon Sep 17 00:00:00 2001
From: Vincenzo Maffione <v.maffione@gmail.com>
Date: Sat, 16 Sep 2017 18:00:00 +0200
Subject: e1000: fix race condition between e1000_down() and e1000_watchdog

This patch fixes a race condition that can result into the interface being
up and carrier on, but with transmits disabled in the hardware.
The bug may show up by repeatedly IFF_DOWN+IFF_UP the interface, which
allows e1000_watchdog() interleave with e1000_down().

    CPU x                           CPU y
    --------------------------------------------------------------------
    e1000_down():
        netif_carrier_off()
                                    e1000_watchdog():
                                        if (carrier == off) {
                                            netif_carrier_on();
                                            enable_hw_transmit();
                                        }
        disable_hw_transmit();
                                    e1000_watchdog():
                                        /* carrier on, do nothing */

Signed-off-by: Vincenzo Maffione <v.maffione@gmail.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_main.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 98375e1e1185..1982f7917a8d 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -520,8 +520,6 @@ void e1000_down(struct e1000_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	u32 rctl, tctl;
 
-	netif_carrier_off(netdev);
-
 	/* disable receives in the hardware */
 	rctl = er32(RCTL);
 	ew32(RCTL, rctl & ~E1000_RCTL_EN);
@@ -537,6 +535,15 @@ void e1000_down(struct e1000_adapter *adapter)
 	E1000_WRITE_FLUSH();
 	msleep(10);
 
+	/* Set the carrier off after transmits have been disabled in the
+	 * hardware, to avoid race conditions with e1000_watchdog() (which
+	 * may be running concurrently to us, checking for the carrier
+	 * bit to decide whether it should enable transmits again). Such
+	 * a race condition would result into transmission being disabled
+	 * in the hardware until the next IFF_DOWN+IFF_UP cycle.
+	 */
+	netif_carrier_off(netdev);
+
 	napi_disable(&adapter->napi);
 
 	e1000_irq_disable(adapter);
-- 
cgit v1.2.3


From 5983587c8c5ef00d6886477544ad67d495bc5479 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 22 Sep 2017 18:13:48 +0100
Subject: e1000: avoid null pointer dereference on invalid stat type

Currently if the stat type is invalid then data[i] is being set
either by dereferencing a null pointer p, or it is reading from
an incorrect previous location if we had a valid stat type
previously.  Fix this by skipping over the read of p on an invalid
stat type.

Detected by CoverityScan, CID#113385 ("Explicit null dereferenced")

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_ethtool.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
index ec8aa4562cc9..3b3983a1ffbb 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c
@@ -1824,11 +1824,12 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	int i;
-	char *p = NULL;
 	const struct e1000_stats *stat = e1000_gstrings_stats;
 
 	e1000_update_stats(adapter);
-	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++) {
+	for (i = 0; i < E1000_GLOBAL_STATS_LEN; i++, stat++) {
+		char *p;
+
 		switch (stat->type) {
 		case NETDEV_STATS:
 			p = (char *)netdev + stat->stat_offset;
@@ -1839,15 +1840,13 @@ static void e1000_get_ethtool_stats(struct net_device *netdev,
 		default:
 			WARN_ONCE(1, "Invalid E1000 stat type: %u index %d\n",
 				  stat->type, i);
-			break;
+			continue;
 		}
 
 		if (stat->sizeof_stat == sizeof(u64))
 			data[i] = *(u64 *)p;
 		else
 			data[i] = *(u32 *)p;
-
-		stat++;
 	}
 /* BUG_ON(i != E1000_STATS_LEN); */
 }
-- 
cgit v1.2.3


From 104ba83363d1d42af62abb247f1426c09a80fced Mon Sep 17 00:00:00 2001
From: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Date: Thu, 19 Oct 2017 20:07:36 +0100
Subject: igb: Fix TX map failure path

When the driver cannot map a TX buffer, instead of rolling back
gracefully and retrying later, we currently get a panic:

[  159.885994] igb 0000:00:00.0: TX DMA map failed
[  159.886588] Unable to handle kernel paging request at virtual address ffff00000a08c7a8
               ...
[  159.897031] PC is at igb_xmit_frame_ring+0x9c8/0xcb8

Fix the erroneous test that leads to this situation.

Signed-off-by: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index fd4a46b03cc8..ea69af267d63 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5326,7 +5326,7 @@ dma_error:
 				       DMA_TO_DEVICE);
 		dma_unmap_len_set(tx_buffer, len, 0);
 
-		if (i--)
+		if (i-- == 0)
 			i += tx_ring->count;
 		tx_buffer = &tx_ring->tx_buffer_info[i];
 	}
-- 
cgit v1.2.3


From 069db9cd0bbde92d3aa947ed86a09cbd4ceb5f67 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 19 Oct 2017 17:07:13 -0400
Subject: ixgbe: Fix Tx map failure path

This patch is a partial revert of "ixgbe: Don't bother clearing buffer
memory for descriptor rings". Specifically I messed up the exception
handling path a bit and this resulted in us incorrectly adding the count
back in when we didn't need to.

In order to make this simpler I am reverting most of the exception handling
path change and instead just replacing the bit that was handled by the
unmap_and_free call.

Fixes: ffed21bcee7a ("ixgbe: Don't bother clearing buffer memory for descriptor rings")
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 4d76afd13868..6d5f31e94358 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -8020,29 +8020,23 @@ static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 	return 0;
 dma_error:
 	dev_err(tx_ring->dev, "TX DMA map failed\n");
-	tx_buffer = &tx_ring->tx_buffer_info[i];
 
 	/* clear dma mappings for failed tx_buffer_info map */
-	while (tx_buffer != first) {
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer_info[i];
 		if (dma_unmap_len(tx_buffer, len))
 			dma_unmap_page(tx_ring->dev,
 				       dma_unmap_addr(tx_buffer, dma),
 				       dma_unmap_len(tx_buffer, len),
 				       DMA_TO_DEVICE);
 		dma_unmap_len_set(tx_buffer, len, 0);
-
-		if (i--)
+		if (tx_buffer == first)
+			break;
+		if (i == 0)
 			i += tx_ring->count;
-		tx_buffer = &tx_ring->tx_buffer_info[i];
+		i--;
 	}
 
-	if (dma_unmap_len(tx_buffer, len))
-		dma_unmap_single(tx_ring->dev,
-				 dma_unmap_addr(tx_buffer, dma),
-				 dma_unmap_len(tx_buffer, len),
-				 DMA_TO_DEVICE);
-	dma_unmap_len_set(tx_buffer, len, 0);
-
 	dev_kfree_skb_any(first->skb);
 	first->skb = NULL;
 
-- 
cgit v1.2.3


From 10781348cadebbd5291c8fb193e850365c914da8 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 20 Oct 2017 13:59:20 -0700
Subject: i40e: Fix incorrect use of tx_itr_setting when checking for Rx ITR
 setup

It looks like there was either a copy/paste error or just a typo that
resulted in the Tx ITR setting being used to determine if we were using
adaptive Rx interrupt moderation or not.

This patch fixes the typo.

Fixes: 65e87c0398f5 ("i40evf: support queue-specific settings for interrupt moderation")
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2756131495f0..ab142e05e196 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2269,7 +2269,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi,
 		goto enable_int;
 	}
 
-	if (ITR_IS_DYNAMIC(tx_itr_setting)) {
+	if (ITR_IS_DYNAMIC(rx_itr_setting)) {
 		rx = i40e_set_new_dynamic_itr(&q_vector->rx);
 		rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr);
 	}
-- 
cgit v1.2.3


From 62b4c6694dfd3821bd5ea5bed48238bbabd5fe8b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Sat, 21 Oct 2017 18:12:29 -0700
Subject: i40e: Add programming descriptors to cleaned_count

This patch updates the i40e driver to include programming descriptors in
the cleaned_count. Without this change it becomes possible for us to leak
memory as we don't trigger a large enough allocation when the time comes to
allocate new buffers and we end up overwriting a number of rx_buffers equal
to the number of programming descriptors we encountered.

Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status")
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Anders K. Pedersen <akp@cohaesio.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index ab142e05e196..120c68f78951 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2102,6 +2102,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 
 		if (unlikely(i40e_rx_is_programming_status(qword))) {
 			i40e_clean_programming_status(rx_ring, rx_desc, qword);
+			cleaned_count++;
 			continue;
 		}
 		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
-- 
cgit v1.2.3


From 4894ac6b6c25b2693dc21c611621dc9fd21f4090 Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Thu, 26 Oct 2017 09:51:33 +0100
Subject: net: stmmac: dwc-qos-eth: Fix typo in DT bindings parsing

According to DT bindings documentation we are expecting a
property called "snps,read-requests" but we are parsing
instead a property called "read,read-requests".

This is clearly a typo. Fix it.

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
index 5efef8001edf..3256e5cbad27 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c
@@ -74,7 +74,7 @@ static int dwc_eth_dwmac_config_dt(struct platform_device *pdev,
 		plat_dat->axi->axi_wr_osr_lmt--;
 	}
 
-	if (of_property_read_u32(np, "read,read-requests",
+	if (of_property_read_u32(np, "snps,read-requests",
 				 &plat_dat->axi->axi_rd_osr_lmt)) {
 		/**
 		 * Since the register has a reset value of 1, if property
-- 
cgit v1.2.3


From 6d9f0790af8d33476f936ac84a07cac42f808f6c Mon Sep 17 00:00:00 2001
From: Jose Abreu <Jose.Abreu@synopsys.com>
Date: Thu, 26 Oct 2017 10:07:12 +0100
Subject: net: stmmac: First Queue must always be in DCB mode

According to DWMAC databook the first queue operating mode
must always be in DCB.

As MTL_QUEUE_DCB = 1, we need to always set the first queue
operating mode to DCB otherwise driver will think that queue
is in AVB mode (because MTL_QUEUE_AVB = 0).

Signed-off-by: Jose Abreu <joabreu@synopsys.com>
Cc: Joao Pinto <jpinto@synopsys.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Cc: Alexandre Torgue <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/net/ethernet')

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 8a280b48e3a9..6383695004a5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -150,6 +150,13 @@ static void stmmac_mtl_setup(struct platform_device *pdev,
 	plat->rx_queues_to_use = 1;
 	plat->tx_queues_to_use = 1;
 
+	/* First Queue must always be in DCB mode. As MTL_QUEUE_DCB = 1 we need
+	 * to always set this, otherwise Queue will be classified as AVB
+	 * (because MTL_QUEUE_AVB = 0).
+	 */
+	plat->rx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB;
+	plat->tx_queues_cfg[0].mode_to_use = MTL_QUEUE_DCB;
+
 	rx_node = of_parse_phandle(pdev->dev.of_node, "snps,mtl-rx-config", 0);
 	if (!rx_node)
 		return;
-- 
cgit v1.2.3