summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/intel/ice/ice_txrx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 12:34:53 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-09-18 12:34:53 -0700
commit81160dda9a7aad13c04e78bb2cfd3c4630e3afab (patch)
tree4bf79ffa9fc7dc5e2915ff978778c3402c491113 /drivers/net/ethernet/intel/ice/ice_txrx.c
parent8b53c76533aa4356602aea98f98a2f3b4051464c (diff)
parent1bab8d4c488be22d57f9dd09968c90a0ddc413bf (diff)
downloadlinux-81160dda9a7aad13c04e78bb2cfd3c4630e3afab.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller: 1) Support IPV6 RA Captive Portal Identifier, from Maciej Żenczykowski. 2) Use bio_vec in the networking instead of custom skb_frag_t, from Matthew Wilcox. 3) Make use of xmit_more in r8169 driver, from Heiner Kallweit. 4) Add devmap_hash to xdp, from Toke Høiland-Jørgensen. 5) Support all variants of 5750X bnxt_en chips, from Michael Chan. 6) More RTNL avoidance work in the core and mlx5 driver, from Vlad Buslov. 7) Add TCP syn cookies bpf helper, from Petar Penkov. 8) Add 'nettest' to selftests and use it, from David Ahern. 9) Add extack support to drop_monitor, add packet alert mode and support for HW drops, from Ido Schimmel. 10) Add VLAN offload to stmmac, from Jose Abreu. 11) Lots of devm_platform_ioremap_resource() conversions, from YueHaibing. 12) Add IONIC driver, from Shannon Nelson. 13) Several kTLS cleanups, from Jakub Kicinski. * git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1930 commits) mlxsw: spectrum_buffers: Add the ability to query the CPU port's shared buffer mlxsw: spectrum: Register CPU port with devlink mlxsw: spectrum_buffers: Prevent changing CPU port's configuration net: ena: fix incorrect update of intr_delay_resolution net: ena: fix retrieval of nonadaptive interrupt moderation intervals net: ena: fix update of interrupt moderation register net: ena: remove all old adaptive rx interrupt moderation code from ena_com net: ena: remove ena_restore_ethtool_params() and relevant fields net: ena: remove old adaptive interrupt moderation code from ena_netdev net: ena: remove code duplication in ena_com_update_nonadaptive_moderation_interval _*() net: ena: enable the interrupt_moderation in driver_supported_features net: ena: reimplement set/get_coalesce() net: ena: switch to dim algorithm for rx adaptive interrupt moderation net: ena: add intr_moder_rx_interval to struct ena_com_dev and use it net: phy: adin: implement Energy Detect Powerdown mode via phy-tunable ethtool: implement Energy Detect Powerdown support via phy-tunable xen-netfront: do not assume sk_buff_head list is empty in error handling s390/ctcm: Delete unnecessary checks before the macro call “dev_kfree_skb” net: ena: don't wake up tx queue when down drop_monitor: Better sanitize notified packets ...
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c204
1 files changed, 142 insertions, 62 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 3c83230434b6..33dd103035dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -95,17 +95,16 @@ void ice_free_tx_ring(struct ice_ring *tx_ring)
/**
* ice_clean_tx_irq - Reclaim resources after transmit completes
- * @vsi: the VSI we care about
* @tx_ring: Tx ring to clean
* @napi_budget: Used to determine if we are in netpoll
*
* Returns true if there's any budget left (e.g. the clean is finished)
*/
-static bool
-ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget)
+static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget)
{
unsigned int total_bytes = 0, total_pkts = 0;
- unsigned int budget = vsi->work_lmt;
+ unsigned int budget = ICE_DFLT_IRQ_WORK;
+ struct ice_vsi *vsi = tx_ring->vsi;
s16 i = tx_ring->next_to_clean;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
@@ -114,6 +113,8 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget)
tx_desc = ICE_TX_DESC(tx_ring, i);
i -= tx_ring->count;
+ prefetch(&vsi->state);
+
do {
struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
@@ -206,7 +207,7 @@ ice_clean_tx_irq(struct ice_vsi *vsi, struct ice_ring *tx_ring, int napi_budget)
smp_mb();
if (__netif_subqueue_stopped(tx_ring->netdev,
tx_ring->q_index) &&
- !test_bit(__ICE_DOWN, vsi->state)) {
+ !test_bit(__ICE_DOWN, vsi->state)) {
netif_wake_subqueue(tx_ring->netdev,
tx_ring->q_index);
++tx_ring->tx_stats.restart_q;
@@ -377,18 +378,28 @@ err:
*/
static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
{
+ u16 prev_ntu = rx_ring->next_to_use;
+
rx_ring->next_to_use = val;
/* update next to alloc since we have filled the ring */
rx_ring->next_to_alloc = val;
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
+ /* QRX_TAIL will be updated with any tail value, but hardware ignores
+ * the lower 3 bits. This makes it so we only bump tail on meaningful
+ * boundaries. Also, this allows us to bump tail on intervals of 8 up to
+ * the budget depending on the current traffic load.
*/
- wmb();
- writel(val, rx_ring->tail);
+ val &= ~0x7;
+ if (prev_ntu != val) {
+ /* Force memory writes to complete before letting h/w
+ * know there are new descriptors to fetch. (Only
+ * applicable for weak-ordered memory model archs,
+ * such as IA-64).
+ */
+ wmb();
+ writel(val, rx_ring->tail);
+ }
}
/**
@@ -445,7 +456,13 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi)
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
*
- * Returns false if all allocations were successful, true if any fail
+ * Returns false if all allocations were successful, true if any fail. Returning
+ * true signals to the caller that we didn't replace cleaned_count buffers and
+ * there is more work to do.
+ *
+ * First, try to clean "cleaned_count" Rx buffers. Then refill the cleaned Rx
+ * buffers. Then bump tail at most one time. Grouping like this lets us avoid
+ * multiple tail writes per call.
*/
bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
{
@@ -462,8 +479,9 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
bi = &rx_ring->rx_buf[ntu];
do {
+ /* if we fail here, we have work remaining */
if (!ice_alloc_mapped_page(rx_ring, bi))
- goto no_bufs;
+ break;
/* sync the buffer for use by the device */
dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
@@ -494,16 +512,7 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
if (rx_ring->next_to_use != ntu)
ice_release_rx_desc(rx_ring, ntu);
- return false;
-
-no_bufs:
- if (rx_ring->next_to_use != ntu)
- ice_release_rx_desc(rx_ring, ntu);
-
- /* make sure to come back via polling to try again after
- * allocation failure
- */
- return true;
+ return !!cleaned_count;
}
/**
@@ -599,6 +608,8 @@ ice_add_rx_frag(struct ice_rx_buf *rx_buf, struct sk_buff *skb,
unsigned int truesize = ICE_RXBUF_2048;
#endif
+ if (!size)
+ return;
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buf->page,
rx_buf->page_offset, size, truesize);
@@ -654,6 +665,8 @@ ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb,
prefetchw(rx_buf->page);
*skb = rx_buf->skb;
+ if (!size)
+ return rx_buf;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
rx_buf->page_offset, size,
@@ -737,8 +750,11 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
*/
static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
{
- /* hand second half of page back to the ring */
+ if (!rx_buf)
+ return;
+
if (ice_can_reuse_rx_page(rx_buf)) {
+ /* hand second half of page back to the ring */
ice_reuse_rx_page(rx_ring, rx_buf);
rx_ring->rx_stats.page_reuse_count++;
} else {
@@ -864,7 +880,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
/**
* ice_rx_csum - Indicate in skb if checksum is good
- * @vsi: the VSI we care about
+ * @ring: the ring we care about
* @skb: skb currently being received and modified
* @rx_desc: the receive descriptor
* @ptype: the packet type decoded by hardware
@@ -872,7 +888,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
* skb->protocol must be set before this function is called
*/
static void
-ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
+ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
{
struct ice_rx_ptype_decoded decoded;
@@ -889,7 +905,7 @@ ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
skb_checksum_none_assert(skb);
/* check if Rx checksum is enabled */
- if (!(vsi->netdev->features & NETIF_F_RXCSUM))
+ if (!(ring->netdev->features & NETIF_F_RXCSUM))
return;
/* check if HW has decoded the packet and checksum */
@@ -929,7 +945,7 @@ ice_rx_csum(struct ice_vsi *vsi, struct sk_buff *skb,
return;
checksum_fail:
- vsi->back->hw_csum_rx_error++;
+ ring->vsi->back->hw_csum_rx_error++;
}
/**
@@ -953,7 +969,7 @@ ice_process_skb_fields(struct ice_ring *rx_ring,
/* modifies the skb - consumes the enet header */
skb->protocol = eth_type_trans(skb, rx_ring->netdev);
- ice_rx_csum(rx_ring->vsi, skb, rx_desc, ptype);
+ ice_rx_csum(rx_ring, skb, rx_desc, ptype);
}
/**
@@ -990,7 +1006,7 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
- bool failure = false;
+ bool failure;
/* start the loop to process Rx packets bounded by 'budget' */
while (likely(total_rx_pkts < (unsigned int)budget)) {
@@ -1002,13 +1018,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
u16 vlan_tag = 0;
u8 rx_ptype;
- /* return some buffers to hardware, one at a time is too slow */
- if (cleaned_count >= ICE_RX_BUF_WRITE) {
- failure = failure ||
- ice_alloc_rx_bufs(rx_ring, cleaned_count);
- cleaned_count = 0;
- }
-
/* get the Rx desc from Rx ring based on 'next_to_clean' */
rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
@@ -1030,8 +1039,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
size = le16_to_cpu(rx_desc->wb.pkt_len) &
ICE_RX_FLX_DESC_PKT_LEN_M;
+ /* retrieve a buffer from the ring */
rx_buf = ice_get_rx_buf(rx_ring, &skb, size);
- /* allocate (if needed) and populate skb */
+
if (skb)
ice_add_rx_frag(rx_buf, skb, size);
else
@@ -1040,7 +1050,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
/* exit if we failed to retrieve a buffer */
if (!skb) {
rx_ring->rx_stats.alloc_buf_failed++;
- rx_buf->pagecnt_bias++;
+ if (rx_buf)
+ rx_buf->pagecnt_bias++;
break;
}
@@ -1057,9 +1068,6 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
continue;
}
- rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
- ICE_RX_FLEX_DESC_PTYPE_M;
-
stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
if (ice_test_staterr(rx_desc, stat_err_bits))
vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
@@ -1076,6 +1084,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
total_rx_bytes += skb->len;
/* populate checksum, VLAN, and protocol */
+ rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
+ ICE_RX_FLEX_DESC_PTYPE_M;
+
ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
/* send completed skb up the stack */
@@ -1085,6 +1096,9 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
total_rx_pkts++;
}
+ /* return up to cleaned_count buffers to hardware */
+ failure = ice_alloc_rx_bufs(rx_ring, cleaned_count);
+
/* update queue and vector specific stats */
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.pkts += total_rx_pkts;
@@ -1212,6 +1226,8 @@ ice_update_itr(struct ice_q_vector *q_vector, struct ice_ring_container *rc)
if (time_after(next_update, rc->next_update))
goto clear_counts;
+ prefetch(q_vector->vsi->port_info);
+
packets = rc->total_pkts;
bytes = rc->total_bytes;
@@ -1341,16 +1357,32 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr)
/**
* ice_update_ena_itr - Update ITR and re-enable MSIX interrupt
- * @vsi: the VSI associated with the q_vector
* @q_vector: q_vector for which ITR is being updated and interrupt enabled
*/
-static void
-ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
+static void ice_update_ena_itr(struct ice_q_vector *q_vector)
{
struct ice_ring_container *tx = &q_vector->tx;
struct ice_ring_container *rx = &q_vector->rx;
+ struct ice_vsi *vsi = q_vector->vsi;
u32 itr_val;
+ /* when exiting WB_ON_ITR lets set a low ITR value and trigger
+ * interrupts to expire right away in case we have more work ready to go
+ * already
+ */
+ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE) {
+ itr_val = ice_buildreg_itr(rx->itr_idx, ICE_WB_ON_ITR_USECS);
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val);
+ /* set target back to last user set value */
+ rx->target_itr = rx->itr_setting;
+ /* set current to what we just wrote and dynamic if needed */
+ rx->current_itr = ICE_WB_ON_ITR_USECS |
+ (rx->itr_setting & ICE_ITR_DYNAMIC);
+ /* allow normal interrupt flow to start */
+ q_vector->itr_countdown = 0;
+ return;
+ }
+
/* This will do nothing if dynamic updates are not enabled */
ice_update_itr(q_vector, tx);
ice_update_itr(q_vector, rx);
@@ -1389,13 +1421,48 @@ ice_update_ena_itr(struct ice_vsi *vsi, struct ice_q_vector *q_vector)
q_vector->itr_countdown--;
}
- if (!test_bit(__ICE_DOWN, vsi->state))
- wr32(&vsi->back->hw,
+ if (!test_bit(__ICE_DOWN, q_vector->vsi->state))
+ wr32(&q_vector->vsi->back->hw,
GLINT_DYN_CTL(q_vector->reg_idx),
itr_val);
}
/**
+ * ice_set_wb_on_itr - set WB_ON_ITR for this q_vector
+ * @q_vector: q_vector to set WB_ON_ITR on
+ *
+ * We need to tell hardware to write-back completed descriptors even when
+ * interrupts are disabled. Descriptors will be written back on cache line
+ * boundaries without WB_ON_ITR enabled, but if we don't enable WB_ON_ITR
+ * descriptors may not be written back if they don't fill a cache line until the
+ * next interrupt.
+ *
+ * This sets the write-back frequency to 2 microseconds as that is the minimum
+ * value that's not 0 due to ITR granularity. Also, set the INTENA_MSK bit to
+ * make sure hardware knows we aren't meddling with the INTENA_M bit.
+ */
+static void ice_set_wb_on_itr(struct ice_q_vector *q_vector)
+{
+ struct ice_vsi *vsi = q_vector->vsi;
+
+ /* already in WB_ON_ITR mode no need to change it */
+ if (q_vector->itr_countdown == ICE_IN_WB_ON_ITR_MODE)
+ return;
+
+ if (q_vector->num_ring_rx)
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+ ICE_RX_ITR));
+
+ if (q_vector->num_ring_tx)
+ wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx),
+ ICE_GLINT_DYN_CTL_WB_ON_ITR(ICE_WB_ON_ITR_USECS,
+ ICE_TX_ITR));
+
+ q_vector->itr_countdown = ICE_IN_WB_ON_ITR_MODE;
+}
+
+/**
* ice_napi_poll - NAPI polling Rx/Tx cleanup routine
* @napi: napi struct with our devices info in it
* @budget: amount of work driver is allowed to do this pass, in packets
@@ -1408,29 +1475,32 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
{
struct ice_q_vector *q_vector =
container_of(napi, struct ice_q_vector, napi);
- struct ice_vsi *vsi = q_vector->vsi;
- struct ice_pf *pf = vsi->back;
bool clean_complete = true;
- int budget_per_ring = 0;
struct ice_ring *ring;
+ int budget_per_ring;
int work_done = 0;
/* Since the actual Tx work is minimal, we can give the Tx a larger
* budget and be more aggressive about cleaning up the Tx descriptors.
*/
ice_for_each_ring(ring, q_vector->tx)
- if (!ice_clean_tx_irq(vsi, ring, budget))
+ if (!ice_clean_tx_irq(ring, budget))
clean_complete = false;
/* Handle case where we are called by netpoll with a budget of 0 */
- if (budget <= 0)
+ if (unlikely(budget <= 0))
return budget;
- /* We attempt to distribute budget to each Rx queue fairly, but don't
- * allow the budget to go below 1 because that would exit polling early.
- */
- if (q_vector->num_ring_rx)
+ /* normally we have 1 Rx ring per q_vector */
+ if (unlikely(q_vector->num_ring_rx > 1))
+ /* We attempt to distribute budget to each Rx queue fairly, but
+ * don't allow the budget to go below 1 because that would exit
+ * polling early.
+ */
budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
+ else
+ /* Max of 1 Rx ring in this q_vector so give it the budget */
+ budget_per_ring = budget;
ice_for_each_ring(ring, q_vector->rx) {
int cleaned;
@@ -1450,8 +1520,9 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
* poll us due to busy-polling
*/
if (likely(napi_complete_done(napi, work_done)))
- if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
- ice_update_ena_itr(vsi, q_vector);
+ ice_update_ena_itr(q_vector);
+ else
+ ice_set_wb_on_itr(q_vector);
return min_t(int, work_done, budget - 1);
}
@@ -1521,7 +1592,7 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
{
u64 td_offset, td_tag, td_cmd;
u16 i = tx_ring->next_to_use;
- struct skb_frag_struct *frag;
+ skb_frag_t *frag;
unsigned int data_len, size;
struct ice_tx_desc *tx_desc;
struct ice_tx_buf *tx_buf;
@@ -1923,7 +1994,7 @@ static unsigned int ice_txd_use_count(unsigned int size)
*/
static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
{
- const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ const skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
unsigned int count = 0, size = skb_headlen(skb);
@@ -1954,7 +2025,7 @@ static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
*/
static bool __ice_chk_linearize(struct sk_buff *skb)
{
- const struct skb_frag_struct *frag, *stale;
+ const skb_frag_t *frag, *stale;
int nr_frags, sum;
/* no need to check if number of frags is less than 7 */
@@ -2036,6 +2107,7 @@ static netdev_tx_t
ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
{
struct ice_tx_offload_params offload = { 0 };
+ struct ice_vsi *vsi = tx_ring->vsi;
struct ice_tx_buf *first;
unsigned int count;
int tso, csum;
@@ -2083,7 +2155,15 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
if (csum < 0)
goto out_drop;
- if (tso || offload.cd_tunnel_params) {
+ /* allow CONTROL frames egress from main VSI if FW LLDP disabled */
+ if (unlikely(skb->priority == TC_PRIO_CONTROL &&
+ vsi->type == ICE_VSI_PF &&
+ vsi->port_info->is_sw_lldp))
+ offload.cd_qw1 |= (u64)(ICE_TX_DESC_DTYPE_CTX |
+ ICE_TX_CTX_DESC_SWTCH_UPLINK <<
+ ICE_TXD_CTX_QW1_CMD_S);
+
+ if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
struct ice_tx_ctx_desc *cdesc;
int i = tx_ring->next_to_use;