summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2020-10-13 16:57:18 -0700
committerJakub Kicinski <kuba@kernel.org>2020-10-13 16:57:18 -0700
commitc93c5482c7d4132e84ee7a9e8d831c00ed7aec41 (patch)
tree9688ac73260eb4cf890da91e3e0e4cfab7e91aa6
parentccdf7fae3afaeaf0e5dd03311b86ffa56adf85ae (diff)
parent0a4e9ce17ba77847e5a9f87eed3c0ba46e3f82eb (diff)
downloadlinux-c93c5482c7d4132e84ee7a9e8d831c00ed7aec41.tar.bz2
Merge branch 'macb-support-the-2-deep-Tx-queue-on-at91'
Willy Tarreau says: ==================== macb: support the 2-deep Tx queue on at91 while running some tests on my Breadbee board, I noticed poor network Tx performance. I had a look at the driver (macb, at91ether variant) and noticed that at91ether_start_xmit() immediately stops the queue after sending a frame and waits for the interrupt to restart the queue, causing a dead time after each packet is sent. The AT91RM9200 datasheet states that the controller supports two frames, one being sent and the other one being queued, so I performed minimal changes to support this. The transmit performance on my board has increased by 50% on medium-sized packets (HTTP traffic), and with large packets I can now reach line rate. Since this driver is shared by various platforms, I tried my best to isolate and limit the changes as much as possible and I think it's pretty reasonable as-is. I've run extensive tests and couldn't meet any unexpected situation (no stall, overflow nor lockup). There are 3 patches in this series. The first one adds the missing interrupt flag for RM9200 (TBRE, indicating the tx buffer is willing to take a new packet). The second one replaces the single skb with a 2-array and uses only index 0. It does no other change, this is just to prepare the code for the third one. The third one implements the queue. Packets are added at the tail of the queue, the queue is stopped at 2 packets and the interrupt releases 0, 1 or 2 depending on what the transmit status register reports. ==================== Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/ethernet/cadence/macb.h10
-rw-r--r--drivers/net/ethernet/cadence/macb_main.c66
2 files changed, 56 insertions, 20 deletions
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 3fd5c6cc23af..5de47f6fde5a 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -366,6 +366,8 @@
#define MACB_ISR_RLE_SIZE 1
#define MACB_TXERR_OFFSET 6 /* EN TX frame corrupt from error interrupt */
#define MACB_TXERR_SIZE 1
+#define MACB_RM9200_TBRE_OFFSET 6 /* EN may send new frame interrupt (RM9200) */
+#define MACB_RM9200_TBRE_SIZE 1
#define MACB_TCOMP_OFFSET 7 /* Enable transmit complete interrupt */
#define MACB_TCOMP_SIZE 1
#define MACB_ISR_LINK_OFFSET 9 /* Enable link change interrupt */
@@ -1205,10 +1207,10 @@ struct macb {
phy_interface_t phy_interface;
- /* AT91RM9200 transmit */
- struct sk_buff *skb; /* holds skb until xmit interrupt completes */
- dma_addr_t skb_physaddr; /* phys addr from pci_map_single */
- int skb_length; /* saved skb length for pci_unmap_single */
+ /* AT91RM9200 transmit queue (1 on wire + 1 queued) */
+ struct macb_tx_skb rm9200_txq[2];
+ unsigned int rm9200_tx_tail;
+ unsigned int rm9200_tx_len;
unsigned int max_tx_length;
u64 ethtool_stats[GEM_STATS_LEN + QUEUE_STATS_LEN * MACB_MAX_QUEUES];
diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
index 4b42b2d6398c..883e47c5b1a7 100644
--- a/drivers/net/ethernet/cadence/macb_main.c
+++ b/drivers/net/ethernet/cadence/macb_main.c
@@ -3908,6 +3908,7 @@ static int at91ether_start(struct macb *lp)
MACB_BIT(ISR_TUND) |
MACB_BIT(ISR_RLE) |
MACB_BIT(TCOMP) |
+ MACB_BIT(RM9200_TBRE) |
MACB_BIT(ISR_ROVR) |
MACB_BIT(HRESP));
@@ -3924,6 +3925,7 @@ static void at91ether_stop(struct macb *lp)
MACB_BIT(ISR_TUND) |
MACB_BIT(ISR_RLE) |
MACB_BIT(TCOMP) |
+ MACB_BIT(RM9200_TBRE) |
MACB_BIT(ISR_ROVR) |
MACB_BIT(HRESP));
@@ -3993,24 +3995,34 @@ static netdev_tx_t at91ether_start_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct macb *lp = netdev_priv(dev);
+ unsigned long flags;
- if (macb_readl(lp, TSR) & MACB_BIT(RM9200_BNQ)) {
- netif_stop_queue(dev);
+ if (lp->rm9200_tx_len < 2) {
+ int desc = lp->rm9200_tx_tail;
/* Store packet information (to free when Tx completed) */
- lp->skb = skb;
- lp->skb_length = skb->len;
- lp->skb_physaddr = dma_map_single(&lp->pdev->dev, skb->data,
- skb->len, DMA_TO_DEVICE);
- if (dma_mapping_error(&lp->pdev->dev, lp->skb_physaddr)) {
+ lp->rm9200_txq[desc].skb = skb;
+ lp->rm9200_txq[desc].size = skb->len;
+ lp->rm9200_txq[desc].mapping = dma_map_single(&lp->pdev->dev, skb->data,
+ skb->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&lp->pdev->dev, lp->rm9200_txq[desc].mapping)) {
dev_kfree_skb_any(skb);
dev->stats.tx_dropped++;
netdev_err(dev, "%s: DMA mapping error\n", __func__);
return NETDEV_TX_OK;
}
+ spin_lock_irqsave(&lp->lock, flags);
+
+ lp->rm9200_tx_tail = (desc + 1) & 1;
+ lp->rm9200_tx_len++;
+ if (lp->rm9200_tx_len > 1)
+ netif_stop_queue(dev);
+
+ spin_unlock_irqrestore(&lp->lock, flags);
+
/* Set address of the data in the Transmit Address register */
- macb_writel(lp, TAR, lp->skb_physaddr);
+ macb_writel(lp, TAR, lp->rm9200_txq[desc].mapping);
/* Set length of the packet in the Transmit Control register */
macb_writel(lp, TCR, skb->len);
@@ -4073,6 +4085,9 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
struct net_device *dev = dev_id;
struct macb *lp = netdev_priv(dev);
u32 intstatus, ctl;
+ unsigned int desc;
+ unsigned int qlen;
+ u32 tsr;
/* MAC Interrupt Status register indicates what interrupts are pending.
* It is automatically cleared once read.
@@ -4084,20 +4099,39 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
at91ether_rx(dev);
/* Transmit complete */
- if (intstatus & MACB_BIT(TCOMP)) {
+ if (intstatus & (MACB_BIT(TCOMP) | MACB_BIT(RM9200_TBRE))) {
/* The TCOM bit is set even if the transmission failed */
if (intstatus & (MACB_BIT(ISR_TUND) | MACB_BIT(ISR_RLE)))
dev->stats.tx_errors++;
- if (lp->skb) {
- dev_consume_skb_irq(lp->skb);
- lp->skb = NULL;
- dma_unmap_single(&lp->pdev->dev, lp->skb_physaddr,
- lp->skb_length, DMA_TO_DEVICE);
+ spin_lock(&lp->lock);
+
+ tsr = macb_readl(lp, TSR);
+
+ /* we have three possibilities here:
+ * - all pending packets transmitted (TGO, implies BNQ)
+ * - only first packet transmitted (!TGO && BNQ)
+ * - two frames pending (!TGO && !BNQ)
+ * Note that TGO ("transmit go") is called "IDLE" on RM9200.
+ */
+ qlen = (tsr & MACB_BIT(TGO)) ? 0 :
+ (tsr & MACB_BIT(RM9200_BNQ)) ? 1 : 2;
+
+ while (lp->rm9200_tx_len > qlen) {
+ desc = (lp->rm9200_tx_tail - lp->rm9200_tx_len) & 1;
+ dev_consume_skb_irq(lp->rm9200_txq[desc].skb);
+ lp->rm9200_txq[desc].skb = NULL;
+ dma_unmap_single(&lp->pdev->dev, lp->rm9200_txq[desc].mapping,
+ lp->rm9200_txq[desc].size, DMA_TO_DEVICE);
dev->stats.tx_packets++;
- dev->stats.tx_bytes += lp->skb_length;
+ dev->stats.tx_bytes += lp->rm9200_txq[desc].size;
+ lp->rm9200_tx_len--;
}
- netif_wake_queue(dev);
+
+ if (lp->rm9200_tx_len < 2 && netif_queue_stopped(dev))
+ netif_wake_queue(dev);
+
+ spin_unlock(&lp->lock);
}
/* Work-around for EMAC Errata section 41.3.1 */