summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/ti
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-01-07 20:48:16 -0500
committerDavid S. Miller <davem@davemloft.net>2017-01-07 20:48:16 -0500
commitb14ad90c9926d841b0478107ae25fa9fb46936d2 (patch)
treea2bac7db0c432f868c580f5db28a68f01887304e /drivers/net/ethernet/ti
parent350a47189c7ab5fdc2e3af855585b99514114d72 (diff)
parentc40d8883a28ece32d753d96e77f05e5e9a7c4415 (diff)
downloadlinux-b14ad90c9926d841b0478107ae25fa9fb46936d2.tar.bz2
Merge branch 'cpsw-cpdma-DDR'
Grygorii Strashko says: ==================== net: ethernet: ti: cpsw: support placing CPDMA descriptors into DDR This series intended to add support for placing CPDMA descriptors into DDR by introducing new module parameter "descs_pool_size" to specify size of descriptor's pool. The "descs_pool_size" defines total number of CPDMA CPPI descriptors to be used for both ingress/egress packets processing. If not specified - the default value 256 will be used which will allow to place descriptor's pool into the internal CPPI RAM. In addition, added ability to re-split CPDMA pool of descriptors between RX and TX path via ethtool '-G' command wich will allow to configure and fix number of descriptors used by RX and TX path, which, then, will be split between RX/TX channels proportionally depending on number of RX/TX channels and its weight. This allows significantly to reduce UDP packets drop rate for bandwidth >301 Mbits/sec (am57x). Before enabling this feature, the am437x SoC has to be fixed as it's proved that it's not working when CPDMA descriptors placed in DDR. So, the patch 1 fixes this issue. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/ti')
-rw-r--r--drivers/net/ethernet/ti/cpsw.c98
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.c161
-rw-r--r--drivers/net/ethernet/ti/davinci_cpdma.h5
3 files changed, 199 insertions, 65 deletions
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index b203143647e6..f339268da11a 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -145,6 +145,7 @@ do { \
cpsw->data.active_slave)
#define IRQ_NUM 2
#define CPSW_MAX_QUEUES 8
+#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256
static int debug_level;
module_param(debug_level, int, 0);
@@ -158,6 +159,10 @@ static int rx_packet_max = CPSW_MAX_PACKET_SIZE;
module_param(rx_packet_max, int, 0);
MODULE_PARM_DESC(rx_packet_max, "maximum receive packet size (bytes)");
+static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT;
+module_param(descs_pool_size, int, 0444);
+MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool");
+
struct cpsw_wr_regs {
u32 id_ver;
u32 soft_reset;
@@ -2479,6 +2484,90 @@ static int cpsw_nway_reset(struct net_device *ndev)
return -EOPNOTSUPP;
}
+static void cpsw_get_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ering)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ struct cpsw_common *cpsw = priv->cpsw;
+
+ /* not supported */
+ ering->tx_max_pending = 0;
+ ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma);
+ /* Max 90% RX buffers */
+ ering->rx_max_pending = (descs_pool_size * 9) / 10;
+ ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma);
+}
+
+static int cpsw_set_ringparam(struct net_device *ndev,
+ struct ethtool_ringparam *ering)
+{
+ struct cpsw_priv *priv = netdev_priv(ndev);
+ struct cpsw_common *cpsw = priv->cpsw;
+ struct cpsw_slave *slave;
+ int i, ret;
+
+ /* ignore ering->tx_pending - only rx_pending adjustment is supported */
+
+ if (ering->rx_mini_pending || ering->rx_jumbo_pending ||
+ ering->rx_pending < (descs_pool_size / 10) ||
+ ering->rx_pending > ((descs_pool_size * 9) / 10))
+ return -EINVAL;
+
+ if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma))
+ return 0;
+
+ /* Disable NAPI scheduling */
+ cpsw_intr_disable(cpsw);
+
+ /* Stop all transmit queues for every network device.
+ * Disable re-using rx descriptors with dormant_on.
+ */
+ for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+ if (!(slave->ndev && netif_running(slave->ndev)))
+ continue;
+
+ netif_tx_stop_all_queues(slave->ndev);
+ netif_dormant_on(slave->ndev);
+ }
+
+ /* Handle rest of tx packets and stop cpdma channels */
+ cpdma_ctlr_stop(cpsw->dma);
+
+ cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending);
+
+ for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+ if (!(slave->ndev && netif_running(slave->ndev)))
+ continue;
+
+ /* Enable rx packets handling */
+ netif_dormant_off(slave->ndev);
+ }
+
+ if (cpsw_common_res_usage_state(cpsw)) {
+ cpdma_chan_split_pool(cpsw->dma);
+
+ ret = cpsw_fill_rx_channels(priv);
+ if (ret)
+ goto err;
+
+ /* After this receive is started */
+ cpdma_ctlr_start(cpsw->dma);
+ cpsw_intr_enable(cpsw);
+ }
+
+ /* Resume transmit for every affected interface */
+ for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) {
+ if (!(slave->ndev && netif_running(slave->ndev)))
+ continue;
+ netif_tx_start_all_queues(slave->ndev);
+ }
+ return 0;
+err:
+ dev_err(priv->dev, "cannot set ring params, closing device\n");
+ dev_close(ndev);
+ return ret;
+}
+
static const struct ethtool_ops cpsw_ethtool_ops = {
.get_drvinfo = cpsw_get_drvinfo,
.get_msglevel = cpsw_get_msglevel,
@@ -2505,6 +2594,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
.get_eee = cpsw_get_eee,
.set_eee = cpsw_set_eee,
.nway_reset = cpsw_nway_reset,
+ .get_ringparam = cpsw_get_ringparam,
+ .set_ringparam = cpsw_set_ringparam,
};
static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
@@ -2969,6 +3060,7 @@ static int cpsw_probe(struct platform_device *pdev)
dma_params.has_ext_regs = true;
dma_params.desc_hw_addr = dma_params.desc_mem_phys;
dma_params.bus_freq_mhz = cpsw->bus_freq_mhz;
+ dma_params.descs_pool_size = descs_pool_size;
cpsw->dma = cpdma_ctlr_create(&dma_params);
if (!cpsw->dma) {
@@ -3072,9 +3164,9 @@ static int cpsw_probe(struct platform_device *pdev)
goto clean_ale_ret;
}
- cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n",
- &ss_res->start, ndev->irq);
-
+ cpsw_notice(priv, probe,
+ "initialized device (regs %pa, irq %d, pool size %d)\n",
+ &ss_res->start, ndev->irq, dma_params.descs_pool_size);
if (cpsw->data.dual_emac) {
ret = cpsw_probe_dual_emac(priv);
if (ret) {
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index 36518fc5c7cc..d80bff19d4ec 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -108,6 +108,8 @@ struct cpdma_ctlr {
spinlock_t lock;
struct cpdma_chan *channels[2 * CPDMA_MAX_CHANNELS];
int chan_num;
+ int num_rx_desc; /* RX descriptors number */
+ int num_tx_desc; /* TX descriptors number */
};
struct cpdma_chan {
@@ -166,12 +168,12 @@ static struct cpdma_control_info controls[] = {
#define num_chan params.num_chan
/* various accessors */
-#define dma_reg_read(ctlr, ofs) __raw_readl((ctlr)->dmaregs + (ofs))
-#define chan_read(chan, fld) __raw_readl((chan)->fld)
-#define desc_read(desc, fld) __raw_readl(&(desc)->fld)
-#define dma_reg_write(ctlr, ofs, v) __raw_writel(v, (ctlr)->dmaregs + (ofs))
-#define chan_write(chan, fld, v) __raw_writel(v, (chan)->fld)
-#define desc_write(desc, fld, v) __raw_writel((u32)(v), &(desc)->fld)
+#define dma_reg_read(ctlr, ofs) readl((ctlr)->dmaregs + (ofs))
+#define chan_read(chan, fld) readl((chan)->fld)
+#define desc_read(desc, fld) readl(&(desc)->fld)
+#define dma_reg_write(ctlr, ofs, v) writel(v, (ctlr)->dmaregs + (ofs))
+#define chan_write(chan, fld, v) writel(v, (chan)->fld)
+#define desc_write(desc, fld, v) writel((u32)(v), &(desc)->fld)
#define cpdma_desc_to_port(chan, mode, directed) \
do { \
@@ -181,8 +183,10 @@ static struct cpdma_control_info controls[] = {
(directed << CPDMA_TO_PORT_SHIFT)); \
} while (0)
-static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
+static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr)
{
+ struct cpdma_desc_pool *pool = ctlr->pool;
+
if (!pool)
return;
@@ -191,10 +195,8 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
gen_pool_size(pool->gen_pool),
gen_pool_avail(pool->gen_pool));
if (pool->cpumap)
- dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap,
+ dma_free_coherent(ctlr->dev, pool->mem_size, pool->cpumap,
pool->phys);
- else
- iounmap(pool->iomap);
}
/*
@@ -203,37 +205,50 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool)
* devices (e.g. cpsw switches) use plain old memory. Descriptor pools
* abstract out these details
*/
-static struct cpdma_desc_pool *
-cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
- int size, int align)
+int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr)
{
+ struct cpdma_params *cpdma_params = &ctlr->params;
struct cpdma_desc_pool *pool;
- int ret;
+ int ret = -ENOMEM;
- pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL);
+ pool = devm_kzalloc(ctlr->dev, sizeof(*pool), GFP_KERNEL);
if (!pool)
goto gen_pool_create_fail;
+ ctlr->pool = pool;
+
+ pool->mem_size = cpdma_params->desc_mem_size;
+ pool->desc_size = ALIGN(sizeof(struct cpdma_desc),
+ cpdma_params->desc_align);
+ pool->num_desc = pool->mem_size / pool->desc_size;
+
+ if (cpdma_params->descs_pool_size) {
+ /* recalculate memory size required cpdma descriptor pool
+ * basing on number of descriptors specified by user and
+ * if memory size > CPPI internal RAM size (desc_mem_size)
+ * then switch to use DDR
+ */
+ pool->num_desc = cpdma_params->descs_pool_size;
+ pool->mem_size = pool->desc_size * pool->num_desc;
+ if (pool->mem_size > cpdma_params->desc_mem_size)
+ cpdma_params->desc_mem_phys = 0;
+ }
- pool->dev = dev;
- pool->mem_size = size;
- pool->desc_size = ALIGN(sizeof(struct cpdma_desc), align);
- pool->num_desc = size / pool->desc_size;
-
- pool->gen_pool = devm_gen_pool_create(dev, ilog2(pool->desc_size), -1,
- "cpdma");
+ pool->gen_pool = devm_gen_pool_create(ctlr->dev, ilog2(pool->desc_size),
+ -1, "cpdma");
if (IS_ERR(pool->gen_pool)) {
- dev_err(dev, "pool create failed %ld\n",
- PTR_ERR(pool->gen_pool));
+ ret = PTR_ERR(pool->gen_pool);
+ dev_err(ctlr->dev, "pool create failed %d\n", ret);
goto gen_pool_create_fail;
}
- if (phys) {
- pool->phys = phys;
- pool->iomap = ioremap(phys, size); /* should be memremap? */
- pool->hw_addr = hw_addr;
+ if (cpdma_params->desc_mem_phys) {
+ pool->phys = cpdma_params->desc_mem_phys;
+ pool->iomap = devm_ioremap(ctlr->dev, pool->phys,
+ pool->mem_size);
+ pool->hw_addr = cpdma_params->desc_hw_addr;
} else {
- pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr,
- GFP_KERNEL);
+ pool->cpumap = dma_alloc_coherent(ctlr->dev, pool->mem_size,
+ &pool->hw_addr, GFP_KERNEL);
pool->iomap = (void __iomem __force *)pool->cpumap;
pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */
}
@@ -244,16 +259,17 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr,
ret = gen_pool_add_virt(pool->gen_pool, (unsigned long)pool->iomap,
pool->phys, pool->mem_size, -1);
if (ret < 0) {
- dev_err(dev, "pool add failed %d\n", ret);
+ dev_err(ctlr->dev, "pool add failed %d\n", ret);
goto gen_pool_add_virt_fail;
}
- return pool;
+ return 0;
gen_pool_add_virt_fail:
- cpdma_desc_pool_destroy(pool);
+ cpdma_desc_pool_destroy(ctlr);
gen_pool_create_fail:
- return NULL;
+ ctlr->pool = NULL;
+ return ret;
}
static inline dma_addr_t desc_phys(struct cpdma_desc_pool *pool,
@@ -502,13 +518,11 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
ctlr->chan_num = 0;
spin_lock_init(&ctlr->lock);
- ctlr->pool = cpdma_desc_pool_create(ctlr->dev,
- ctlr->params.desc_mem_phys,
- ctlr->params.desc_hw_addr,
- ctlr->params.desc_mem_size,
- ctlr->params.desc_align);
- if (!ctlr->pool)
+ if (cpdma_desc_pool_create(ctlr))
return NULL;
+ /* split pool equally between RX/TX by default */
+ ctlr->num_tx_desc = ctlr->pool->num_desc / 2;
+ ctlr->num_rx_desc = ctlr->pool->num_desc - ctlr->num_tx_desc;
if (WARN_ON(ctlr->num_chan > CPDMA_MAX_CHANNELS))
ctlr->num_chan = CPDMA_MAX_CHANNELS;
@@ -542,10 +556,10 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
}
for (i = 0; i < ctlr->num_chan; i++) {
- __raw_writel(0, ctlr->params.txhdp + 4 * i);
- __raw_writel(0, ctlr->params.rxhdp + 4 * i);
- __raw_writel(0, ctlr->params.txcp + 4 * i);
- __raw_writel(0, ctlr->params.rxcp + 4 * i);
+ writel(0, ctlr->params.txhdp + 4 * i);
+ writel(0, ctlr->params.rxhdp + 4 * i);
+ writel(0, ctlr->params.txcp + 4 * i);
+ writel(0, ctlr->params.rxcp + 4 * i);
}
dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff);
@@ -623,7 +637,7 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr)
for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++)
cpdma_chan_destroy(ctlr->channels[i]);
- cpdma_desc_pool_destroy(ctlr->pool);
+ cpdma_desc_pool_destroy(ctlr);
return ret;
}
EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy);
@@ -708,22 +722,22 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
}
}
/* use remains */
- most_chan->desc_num += desc_cnt;
+ if (most_chan)
+ most_chan->desc_num += desc_cnt;
}
/**
* cpdma_chan_split_pool - Splits ctrl pool between all channels.
* Has to be called under ctlr lock
*/
-static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
{
int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
- struct cpdma_desc_pool *pool = ctlr->pool;
int free_rx_num = 0, free_tx_num = 0;
int rx_weight = 0, tx_weight = 0;
int tx_desc_num, rx_desc_num;
struct cpdma_chan *chan;
- int i, tx_num = 0;
+ int i;
if (!ctlr->chan_num)
return 0;
@@ -741,15 +755,14 @@ static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
if (!chan->weight)
free_tx_num++;
tx_weight += chan->weight;
- tx_num++;
}
}
if (rx_weight > 100 || tx_weight > 100)
return -EINVAL;
- tx_desc_num = (tx_num * pool->num_desc) / ctlr->chan_num;
- rx_desc_num = pool->num_desc - tx_desc_num;
+ tx_desc_num = ctlr->num_tx_desc;
+ rx_desc_num = ctlr->num_rx_desc;
if (free_tx_num) {
tx_per_ch_desc = tx_desc_num - (tx_weight * tx_desc_num) / 100;
@@ -765,6 +778,8 @@ static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
return 0;
}
+EXPORT_SYMBOL_GPL(cpdma_chan_split_pool);
+
/* cpdma_chan_set_weight - set weight of a channel in percentage.
* Tx and Rx channels have separate weights. That is 100% for RX
@@ -898,7 +913,6 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
chan->chan_num = chan_num;
chan->handler = handler;
chan->rate = 0;
- chan->desc_num = ctlr->pool->num_desc / 2;
chan->weight = 0;
if (is_rx_chan(chan)) {
@@ -1061,13 +1075,17 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data,
mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP;
cpdma_desc_to_port(chan, mode, directed);
- desc_write(desc, hw_next, 0);
- desc_write(desc, hw_buffer, buffer);
- desc_write(desc, hw_len, len);
- desc_write(desc, hw_mode, mode | len);
- desc_write(desc, sw_token, token);
- desc_write(desc, sw_buffer, buffer);
- desc_write(desc, sw_len, len);
+ /* Relaxed IO accessors can be used here as there is read barrier
+ * at the end of write sequence.
+ */
+ writel_relaxed(0, &desc->hw_next);
+ writel_relaxed(buffer, &desc->hw_buffer);
+ writel_relaxed(len, &desc->hw_len);
+ writel_relaxed(mode | len, &desc->hw_mode);
+ writel_relaxed(token, &desc->sw_token);
+ writel_relaxed(buffer, &desc->sw_buffer);
+ writel_relaxed(len, &desc->sw_len);
+ desc_read(desc, sw_len);
__cpdma_chan_submit(chan, desc);
@@ -1136,7 +1154,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
}
desc_dma = desc_phys(pool, desc);
- status = __raw_readl(&desc->hw_mode);
+ status = desc_read(desc, hw_mode);
outlen = status & 0x7ff;
if (status & CPDMA_DESC_OWNER) {
chan->stats.busy_dequeue++;
@@ -1155,7 +1173,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan)
chan->count--;
chan->stats.good_dequeue++;
- if (status & CPDMA_DESC_EOQ) {
+ if ((status & CPDMA_DESC_EOQ) && chan->head) {
chan->stats.requeue++;
chan_write(chan, hdp, desc_phys(pool, chan->head));
}
@@ -1316,4 +1334,23 @@ int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value)
}
EXPORT_SYMBOL_GPL(cpdma_control_set);
+int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr)
+{
+ return ctlr->num_rx_desc;
+}
+EXPORT_SYMBOL_GPL(cpdma_get_num_rx_descs);
+
+int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr)
+{
+ return ctlr->num_tx_desc;
+}
+EXPORT_SYMBOL_GPL(cpdma_get_num_tx_descs);
+
+void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc)
+{
+ ctlr->num_rx_desc = num_rx_desc;
+ ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc;
+}
+EXPORT_SYMBOL_GPL(cpdma_set_num_rx_descs);
+
MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index 4a167db2abab..fd65ce2b83de 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -37,6 +37,7 @@ struct cpdma_params {
int desc_mem_size;
int desc_align;
u32 bus_freq_mhz;
+ u32 descs_pool_size;
/*
* Some instances of embedded cpdma controllers have extra control and
@@ -113,5 +114,9 @@ enum cpdma_control {
int cpdma_control_get(struct cpdma_ctlr *ctlr, int control);
int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value);
+int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr);
+void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc);
+int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr);
+int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr);
#endif