diff options
author | David S. Miller <davem@davemloft.net> | 2017-01-07 20:48:16 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-01-07 20:48:16 -0500 |
commit | b14ad90c9926d841b0478107ae25fa9fb46936d2 (patch) | |
tree | a2bac7db0c432f868c580f5db28a68f01887304e /drivers/net/ethernet/ti | |
parent | 350a47189c7ab5fdc2e3af855585b99514114d72 (diff) | |
parent | c40d8883a28ece32d753d96e77f05e5e9a7c4415 (diff) | |
download | linux-b14ad90c9926d841b0478107ae25fa9fb46936d2.tar.bz2 |
Merge branch 'cpsw-cpdma-DDR'
Grygorii Strashko says:
====================
net: ethernet: ti: cpsw: support placing CPDMA descriptors into DDR
This series intended to add support for placing CPDMA descriptors into
DDR by introducing new module parameter "descs_pool_size" to specify
size of descriptor's pool. The "descs_pool_size" defines total number
of CPDMA CPPI descriptors to be used for both ingress/egress packets
processing. If not specified - the default value 256 will be used
which will allow to place descriptor's pool into the internal CPPI
RAM.
In addition, added ability to re-split CPDMA pool of descriptors
between RX and TX path via ethtool '-G' command wich will allow to
configure and fix number of descriptors used by RX and TX path, which,
then, will be split between RX/TX channels proportionally depending on
number of RX/TX channels and its weight.
This allows significantly to reduce UDP packets drop rate for
bandwidth >301 Mbits/sec (am57x).
Before enabling this feature, the am437x SoC has to be fixed as it's
proved that it's not working when CPDMA descriptors placed in DDR.
So, the patch 1 fixes this issue.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/ti')
-rw-r--r-- | drivers/net/ethernet/ti/cpsw.c | 98 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/davinci_cpdma.c | 161 | ||||
-rw-r--r-- | drivers/net/ethernet/ti/davinci_cpdma.h | 5 |
3 files changed, 199 insertions, 65 deletions
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index b203143647e6..f339268da11a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -145,6 +145,7 @@ do { \ cpsw->data.active_slave) #define IRQ_NUM 2 #define CPSW_MAX_QUEUES 8 +#define CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT 256 static int debug_level; module_param(debug_level, int, 0); @@ -158,6 +159,10 @@ static int rx_packet_max = CPSW_MAX_PACKET_SIZE; module_param(rx_packet_max, int, 0); MODULE_PARM_DESC(rx_packet_max, "maximum receive packet size (bytes)"); +static int descs_pool_size = CPSW_CPDMA_DESCS_POOL_SIZE_DEFAULT; +module_param(descs_pool_size, int, 0444); +MODULE_PARM_DESC(descs_pool_size, "Number of CPDMA CPPI descriptors in pool"); + struct cpsw_wr_regs { u32 id_ver; u32 soft_reset; @@ -2479,6 +2484,90 @@ static int cpsw_nway_reset(struct net_device *ndev) return -EOPNOTSUPP; } +static void cpsw_get_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ering) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + + /* not supported */ + ering->tx_max_pending = 0; + ering->tx_pending = cpdma_get_num_tx_descs(cpsw->dma); + /* Max 90% RX buffers */ + ering->rx_max_pending = (descs_pool_size * 9) / 10; + ering->rx_pending = cpdma_get_num_rx_descs(cpsw->dma); +} + +static int cpsw_set_ringparam(struct net_device *ndev, + struct ethtool_ringparam *ering) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_slave *slave; + int i, ret; + + /* ignore ering->tx_pending - only rx_pending adjustment is supported */ + + if (ering->rx_mini_pending || ering->rx_jumbo_pending || + ering->rx_pending < (descs_pool_size / 10) || + ering->rx_pending > ((descs_pool_size * 9) / 10)) + return -EINVAL; + + if (ering->rx_pending == cpdma_get_num_rx_descs(cpsw->dma)) + return 0; + + /* Disable NAPI scheduling */ + cpsw_intr_disable(cpsw); + + /* Stop all transmit queues for every network device. + * Disable re-using rx descriptors with dormant_on. + */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + netif_tx_stop_all_queues(slave->ndev); + netif_dormant_on(slave->ndev); + } + + /* Handle rest of tx packets and stop cpdma channels */ + cpdma_ctlr_stop(cpsw->dma); + + cpdma_set_num_rx_descs(cpsw->dma, ering->rx_pending); + + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + /* Enable rx packets handling */ + netif_dormant_off(slave->ndev); + } + + if (cpsw_common_res_usage_state(cpsw)) { + cpdma_chan_split_pool(cpsw->dma); + + ret = cpsw_fill_rx_channels(priv); + if (ret) + goto err; + + /* After this receive is started */ + cpdma_ctlr_start(cpsw->dma); + cpsw_intr_enable(cpsw); + } + + /* Resume transmit for every affected interface */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + netif_tx_start_all_queues(slave->ndev); + } + return 0; +err: + dev_err(priv->dev, "cannot set ring params, closing device\n"); + dev_close(ndev); + return ret; +} + static const struct ethtool_ops cpsw_ethtool_ops = { .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, @@ -2505,6 +2594,8 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .get_eee = cpsw_get_eee, .set_eee = cpsw_set_eee, .nway_reset = cpsw_nway_reset, + .get_ringparam = cpsw_get_ringparam, + .set_ringparam = cpsw_set_ringparam, }; static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw, @@ -2969,6 +3060,7 @@ static int cpsw_probe(struct platform_device *pdev) dma_params.has_ext_regs = true; dma_params.desc_hw_addr = dma_params.desc_mem_phys; dma_params.bus_freq_mhz = cpsw->bus_freq_mhz; + dma_params.descs_pool_size = descs_pool_size; cpsw->dma = cpdma_ctlr_create(&dma_params); if (!cpsw->dma) { @@ -3072,9 +3164,9 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n", - &ss_res->start, ndev->irq); - + cpsw_notice(priv, probe, + "initialized device (regs %pa, irq %d, pool size %d)\n", + &ss_res->start, ndev->irq, dma_params.descs_pool_size); if (cpsw->data.dual_emac) { ret = cpsw_probe_dual_emac(priv); if (ret) { diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 36518fc5c7cc..d80bff19d4ec 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -108,6 +108,8 @@ struct cpdma_ctlr { spinlock_t lock; struct cpdma_chan *channels[2 * CPDMA_MAX_CHANNELS]; int chan_num; + int num_rx_desc; /* RX descriptors number */ + int num_tx_desc; /* TX descriptors number */ }; struct cpdma_chan { @@ -166,12 +168,12 @@ static struct cpdma_control_info controls[] = { #define num_chan params.num_chan /* various accessors */ -#define dma_reg_read(ctlr, ofs) __raw_readl((ctlr)->dmaregs + (ofs)) -#define chan_read(chan, fld) __raw_readl((chan)->fld) -#define desc_read(desc, fld) __raw_readl(&(desc)->fld) -#define dma_reg_write(ctlr, ofs, v) __raw_writel(v, (ctlr)->dmaregs + (ofs)) -#define chan_write(chan, fld, v) __raw_writel(v, (chan)->fld) -#define desc_write(desc, fld, v) __raw_writel((u32)(v), &(desc)->fld) +#define dma_reg_read(ctlr, ofs) readl((ctlr)->dmaregs + (ofs)) +#define chan_read(chan, fld) readl((chan)->fld) +#define desc_read(desc, fld) readl(&(desc)->fld) +#define dma_reg_write(ctlr, ofs, v) writel(v, (ctlr)->dmaregs + (ofs)) +#define chan_write(chan, fld, v) writel(v, (chan)->fld) +#define desc_write(desc, fld, v) writel((u32)(v), &(desc)->fld) #define cpdma_desc_to_port(chan, mode, directed) \ do { \ @@ -181,8 +183,10 @@ static struct cpdma_control_info controls[] = { (directed << CPDMA_TO_PORT_SHIFT)); \ } while (0) -static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) +static void cpdma_desc_pool_destroy(struct cpdma_ctlr *ctlr) { + struct cpdma_desc_pool *pool = ctlr->pool; + if (!pool) return; @@ -191,10 +195,8 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) gen_pool_size(pool->gen_pool), gen_pool_avail(pool->gen_pool)); if (pool->cpumap) - dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap, + dma_free_coherent(ctlr->dev, pool->mem_size, pool->cpumap, pool->phys); - else - iounmap(pool->iomap); } /* @@ -203,37 +205,50 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) * devices (e.g. cpsw switches) use plain old memory. Descriptor pools * abstract out these details */ -static struct cpdma_desc_pool * -cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr, - int size, int align) +int cpdma_desc_pool_create(struct cpdma_ctlr *ctlr) { + struct cpdma_params *cpdma_params = &ctlr->params; struct cpdma_desc_pool *pool; - int ret; + int ret = -ENOMEM; - pool = devm_kzalloc(dev, sizeof(*pool), GFP_KERNEL); + pool = devm_kzalloc(ctlr->dev, sizeof(*pool), GFP_KERNEL); if (!pool) goto gen_pool_create_fail; + ctlr->pool = pool; + + pool->mem_size = cpdma_params->desc_mem_size; + pool->desc_size = ALIGN(sizeof(struct cpdma_desc), + cpdma_params->desc_align); + pool->num_desc = pool->mem_size / pool->desc_size; + + if (cpdma_params->descs_pool_size) { + /* recalculate memory size required cpdma descriptor pool + * basing on number of descriptors specified by user and + * if memory size > CPPI internal RAM size (desc_mem_size) + * then switch to use DDR + */ + pool->num_desc = cpdma_params->descs_pool_size; + pool->mem_size = pool->desc_size * pool->num_desc; + if (pool->mem_size > cpdma_params->desc_mem_size) + cpdma_params->desc_mem_phys = 0; + } - pool->dev = dev; - pool->mem_size = size; - pool->desc_size = ALIGN(sizeof(struct cpdma_desc), align); - pool->num_desc = size / pool->desc_size; - - pool->gen_pool = devm_gen_pool_create(dev, ilog2(pool->desc_size), -1, - "cpdma"); + pool->gen_pool = devm_gen_pool_create(ctlr->dev, ilog2(pool->desc_size), + -1, "cpdma"); if (IS_ERR(pool->gen_pool)) { - dev_err(dev, "pool create failed %ld\n", - PTR_ERR(pool->gen_pool)); + ret = PTR_ERR(pool->gen_pool); + dev_err(ctlr->dev, "pool create failed %d\n", ret); goto gen_pool_create_fail; } - if (phys) { - pool->phys = phys; - pool->iomap = ioremap(phys, size); /* should be memremap? */ - pool->hw_addr = hw_addr; + if (cpdma_params->desc_mem_phys) { + pool->phys = cpdma_params->desc_mem_phys; + pool->iomap = devm_ioremap(ctlr->dev, pool->phys, + pool->mem_size); + pool->hw_addr = cpdma_params->desc_hw_addr; } else { - pool->cpumap = dma_alloc_coherent(dev, size, &pool->hw_addr, - GFP_KERNEL); + pool->cpumap = dma_alloc_coherent(ctlr->dev, pool->mem_size, + &pool->hw_addr, GFP_KERNEL); pool->iomap = (void __iomem __force *)pool->cpumap; pool->phys = pool->hw_addr; /* assumes no IOMMU, don't use this value */ } @@ -244,16 +259,17 @@ cpdma_desc_pool_create(struct device *dev, u32 phys, dma_addr_t hw_addr, ret = gen_pool_add_virt(pool->gen_pool, (unsigned long)pool->iomap, pool->phys, pool->mem_size, -1); if (ret < 0) { - dev_err(dev, "pool add failed %d\n", ret); + dev_err(ctlr->dev, "pool add failed %d\n", ret); goto gen_pool_add_virt_fail; } - return pool; + return 0; gen_pool_add_virt_fail: - cpdma_desc_pool_destroy(pool); + cpdma_desc_pool_destroy(ctlr); gen_pool_create_fail: - return NULL; + ctlr->pool = NULL; + return ret; } static inline dma_addr_t desc_phys(struct cpdma_desc_pool *pool, @@ -502,13 +518,11 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) ctlr->chan_num = 0; spin_lock_init(&ctlr->lock); - ctlr->pool = cpdma_desc_pool_create(ctlr->dev, - ctlr->params.desc_mem_phys, - ctlr->params.desc_hw_addr, - ctlr->params.desc_mem_size, - ctlr->params.desc_align); - if (!ctlr->pool) + if (cpdma_desc_pool_create(ctlr)) return NULL; + /* split pool equally between RX/TX by default */ + ctlr->num_tx_desc = ctlr->pool->num_desc / 2; + ctlr->num_rx_desc = ctlr->pool->num_desc - ctlr->num_tx_desc; if (WARN_ON(ctlr->num_chan > CPDMA_MAX_CHANNELS)) ctlr->num_chan = CPDMA_MAX_CHANNELS; @@ -542,10 +556,10 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr) } for (i = 0; i < ctlr->num_chan; i++) { - __raw_writel(0, ctlr->params.txhdp + 4 * i); - __raw_writel(0, ctlr->params.rxhdp + 4 * i); - __raw_writel(0, ctlr->params.txcp + 4 * i); - __raw_writel(0, ctlr->params.rxcp + 4 * i); + writel(0, ctlr->params.txhdp + 4 * i); + writel(0, ctlr->params.rxhdp + 4 * i); + writel(0, ctlr->params.txcp + 4 * i); + writel(0, ctlr->params.rxcp + 4 * i); } dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff); @@ -623,7 +637,7 @@ int cpdma_ctlr_destroy(struct cpdma_ctlr *ctlr) for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) cpdma_chan_destroy(ctlr->channels[i]); - cpdma_desc_pool_destroy(ctlr->pool); + cpdma_desc_pool_destroy(ctlr); return ret; } EXPORT_SYMBOL_GPL(cpdma_ctlr_destroy); @@ -708,22 +722,22 @@ static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr, } } /* use remains */ - most_chan->desc_num += desc_cnt; + if (most_chan) + most_chan->desc_num += desc_cnt; } /** * cpdma_chan_split_pool - Splits ctrl pool between all channels. * Has to be called under ctlr lock */ -static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) { int tx_per_ch_desc = 0, rx_per_ch_desc = 0; - struct cpdma_desc_pool *pool = ctlr->pool; int free_rx_num = 0, free_tx_num = 0; int rx_weight = 0, tx_weight = 0; int tx_desc_num, rx_desc_num; struct cpdma_chan *chan; - int i, tx_num = 0; + int i; if (!ctlr->chan_num) return 0; @@ -741,15 +755,14 @@ static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) if (!chan->weight) free_tx_num++; tx_weight += chan->weight; - tx_num++; } } if (rx_weight > 100 || tx_weight > 100) return -EINVAL; - tx_desc_num = (tx_num * pool->num_desc) / ctlr->chan_num; - rx_desc_num = pool->num_desc - tx_desc_num; + tx_desc_num = ctlr->num_tx_desc; + rx_desc_num = ctlr->num_rx_desc; if (free_tx_num) { tx_per_ch_desc = tx_desc_num - (tx_weight * tx_desc_num) / 100; @@ -765,6 +778,8 @@ static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) return 0; } +EXPORT_SYMBOL_GPL(cpdma_chan_split_pool); + /* cpdma_chan_set_weight - set weight of a channel in percentage. * Tx and Rx channels have separate weights. That is 100% for RX @@ -898,7 +913,6 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, chan->chan_num = chan_num; chan->handler = handler; chan->rate = 0; - chan->desc_num = ctlr->pool->num_desc / 2; chan->weight = 0; if (is_rx_chan(chan)) { @@ -1061,13 +1075,17 @@ int cpdma_chan_submit(struct cpdma_chan *chan, void *token, void *data, mode = CPDMA_DESC_OWNER | CPDMA_DESC_SOP | CPDMA_DESC_EOP; cpdma_desc_to_port(chan, mode, directed); - desc_write(desc, hw_next, 0); - desc_write(desc, hw_buffer, buffer); - desc_write(desc, hw_len, len); - desc_write(desc, hw_mode, mode | len); - desc_write(desc, sw_token, token); - desc_write(desc, sw_buffer, buffer); - desc_write(desc, sw_len, len); + /* Relaxed IO accessors can be used here as there is read barrier + * at the end of write sequence. + */ + writel_relaxed(0, &desc->hw_next); + writel_relaxed(buffer, &desc->hw_buffer); + writel_relaxed(len, &desc->hw_len); + writel_relaxed(mode | len, &desc->hw_mode); + writel_relaxed(token, &desc->sw_token); + writel_relaxed(buffer, &desc->sw_buffer); + writel_relaxed(len, &desc->sw_len); + desc_read(desc, sw_len); __cpdma_chan_submit(chan, desc); @@ -1136,7 +1154,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan) } desc_dma = desc_phys(pool, desc); - status = __raw_readl(&desc->hw_mode); + status = desc_read(desc, hw_mode); outlen = status & 0x7ff; if (status & CPDMA_DESC_OWNER) { chan->stats.busy_dequeue++; @@ -1155,7 +1173,7 @@ static int __cpdma_chan_process(struct cpdma_chan *chan) chan->count--; chan->stats.good_dequeue++; - if (status & CPDMA_DESC_EOQ) { + if ((status & CPDMA_DESC_EOQ) && chan->head) { chan->stats.requeue++; chan_write(chan, hdp, desc_phys(pool, chan->head)); } @@ -1316,4 +1334,23 @@ int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value) } EXPORT_SYMBOL_GPL(cpdma_control_set); +int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr) +{ + return ctlr->num_rx_desc; +} +EXPORT_SYMBOL_GPL(cpdma_get_num_rx_descs); + +int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr) +{ + return ctlr->num_tx_desc; +} +EXPORT_SYMBOL_GPL(cpdma_get_num_tx_descs); + +void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc) +{ + ctlr->num_rx_desc = num_rx_desc; + ctlr->num_tx_desc = ctlr->pool->num_desc - ctlr->num_rx_desc; +} +EXPORT_SYMBOL_GPL(cpdma_set_num_rx_descs); + MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 4a167db2abab..fd65ce2b83de 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -37,6 +37,7 @@ struct cpdma_params { int desc_mem_size; int desc_align; u32 bus_freq_mhz; + u32 descs_pool_size; /* * Some instances of embedded cpdma controllers have extra control and @@ -113,5 +114,9 @@ enum cpdma_control { int cpdma_control_get(struct cpdma_ctlr *ctlr, int control); int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value); +int cpdma_get_num_rx_descs(struct cpdma_ctlr *ctlr); +void cpdma_set_num_rx_descs(struct cpdma_ctlr *ctlr, int num_rx_desc); +int cpdma_get_num_tx_descs(struct cpdma_ctlr *ctlr); +int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr); #endif |