summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-05-06 15:55:30 -0400
committerDavid S. Miller <davem@davemloft.net>2016-05-06 15:55:30 -0400
commit95aef7cecbc229e7d6dc26780a7d39e864dc1ed8 (patch)
tree1a77215f05888c4d1e98a4fc4c093fbfafa8bcc3 /drivers
parentb3b4663c973bf11ef19243fa4f1a544cbdc2fa8e (diff)
parent3949c4ac8cfa8ab3518a326c72eff1a2ff489bb9 (diff)
downloadlinux-95aef7cecbc229e7d6dc26780a7d39e864dc1ed8.tar.bz2
Merge branch '40GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/next-queue
Jeff Kirsher says: ==================== 40GbE Intel Wired LAN Driver Updates 2016-05-05 This series contains updates to i40e and i40evf. The theme behind this series is code reduction, yeah! Jesse provides most of the changes starting with a refactor of the interpretation of a tunnel which lets us start using the hardware's parsing. Removed the packet split receive routine and ancillary code in preparation for the Rx-refactor. The refactor of the receive routine, aligns the receive routine with the one in ixgbe which was highly optimized. The hardware supports a 16 byte descriptor for receive, but the driver was never using it in production. There was no performance benefit to the real driver of 16 byte descriptors, so drop a whole lot of complexity while getting rid of the code. Fixed a bug where while changing the number of descriptors using ethtool, the driver did not test the limits of the system memory before permanently assuming it would be able to get receive buffer memory. Mitch fixes a memory leak of one page each time the driver is opened by allocating the correct number of receive buffers and do not fiddle with next_to_use in the VF driver. Arnd Bergmann fixed a indentation issue by adding the appropriate curly braces in i40e_vc_config_promiscuous_mode_msg(). Julia Lawall fixed an issue found by Coccinelle, where i40e_client_ops structure can be const since it is never modified. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/infiniband/hw/i40iw/i40iw_main.c2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h11
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_client.h2
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c31
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c54
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c73
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c968
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.h69
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c5
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c930
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.h69
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf.h7
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c65
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c34
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c4
15 files changed, 1062 insertions, 1262 deletions
diff --git a/drivers/infiniband/hw/i40iw/i40iw_main.c b/drivers/infiniband/hw/i40iw/i40iw_main.c
index 90e5af21737e..e41fae2422ab 100644
--- a/drivers/infiniband/hw/i40iw/i40iw_main.c
+++ b/drivers/infiniband/hw/i40iw/i40iw_main.c
@@ -1863,7 +1863,7 @@ static enum i40iw_status_code i40iw_virtchnl_send(struct i40iw_sc_dev *dev,
}
/* client interface functions */
-static struct i40e_client_ops i40e_ops = {
+static const struct i40e_client_ops i40e_ops = {
.open = i40iw_open,
.close = i40iw_close,
.l2_param_change = i40iw_l2param_change,
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 00c473874f01..2a6a5d3dd874 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -101,7 +101,6 @@
#define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1)
#define I40E_PRIV_FLAGS_FD_ATR BIT(2)
#define I40E_PRIV_FLAGS_VEB_STATS BIT(3)
-#define I40E_PRIV_FLAGS_PS BIT(4)
#define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(5)
#define I40E_NVM_VERSION_LO_SHIFT 0
@@ -123,10 +122,7 @@
#define XSTRINGIFY(bar) STRINGIFY(bar)
#define I40E_RX_DESC(R, i) \
- ((ring_is_16byte_desc_enabled(R)) \
- ? (union i40e_32byte_rx_desc *) \
- (&(((union i40e_16byte_rx_desc *)((R)->desc))[i])) \
- : (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])))
+ (&(((union i40e_32byte_rx_desc *)((R)->desc))[i]))
#define I40E_TX_DESC(R, i) \
(&(((struct i40e_tx_desc *)((R)->desc))[i]))
#define I40E_TX_CTXTDESC(R, i) \
@@ -320,8 +316,6 @@ struct i40e_pf {
#define I40E_FLAG_RX_CSUM_ENABLED BIT_ULL(1)
#define I40E_FLAG_MSI_ENABLED BIT_ULL(2)
#define I40E_FLAG_MSIX_ENABLED BIT_ULL(3)
-#define I40E_FLAG_RX_1BUF_ENABLED BIT_ULL(4)
-#define I40E_FLAG_RX_PS_ENABLED BIT_ULL(5)
#define I40E_FLAG_RSS_ENABLED BIT_ULL(6)
#define I40E_FLAG_VMDQ_ENABLED BIT_ULL(7)
#define I40E_FLAG_FDIR_REQUIRES_REINIT BIT_ULL(8)
@@ -330,7 +324,6 @@ struct i40e_pf {
#ifdef I40E_FCOE
#define I40E_FLAG_FCOE_ENABLED BIT_ULL(11)
#endif /* I40E_FCOE */
-#define I40E_FLAG_16BYTE_RX_DESC_ENABLED BIT_ULL(13)
#define I40E_FLAG_CLEAN_ADMINQ BIT_ULL(14)
#define I40E_FLAG_FILTER_SYNC BIT_ULL(15)
#define I40E_FLAG_SERVICE_CLIENT_REQUESTED BIT_ULL(16)
@@ -534,9 +527,7 @@ struct i40e_vsi {
u8 *rss_lut_user; /* User configured lookup table entries */
u16 max_frame;
- u16 rx_hdr_len;
u16 rx_buf_len;
- u8 dtype;
/* List of q_vectors allocated to this VSI */
struct i40e_q_vector **q_vectors;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index bf6b453d93a1..a4601d97fb24 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -217,7 +217,7 @@ struct i40e_client {
#define I40E_CLIENT_FLAGS_LAUNCH_ON_PROBE BIT(0)
#define I40E_TX_FLAGS_NOTIFY_OTHER_EVENTS BIT(2)
enum i40e_client_type type;
- struct i40e_client_ops *ops; /* client ops provided by the client */
+ const struct i40e_client_ops *ops; /* client ops provided by the client */
};
static inline bool i40e_client_is_registered(struct i40e_client *client)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 83dccf1792e7..e6af8c8d7019 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -268,13 +268,11 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
rx_ring->queue_index,
rx_ring->reg_idx);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n",
- i, rx_ring->rx_hdr_len,
- rx_ring->rx_buf_len,
- rx_ring->dtype);
+ " rx_rings[%i]: rx_buf_len = %d\n",
+ i, rx_ring->rx_buf_len);
dev_info(&pf->pdev->dev,
- " rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
- i, ring_is_ps_enabled(rx_ring),
+ " rx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+ i,
rx_ring->next_to_use,
rx_ring->next_to_clean,
rx_ring->ring_active);
@@ -326,9 +324,6 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
tx_ring->queue_index,
tx_ring->reg_idx);
dev_info(&pf->pdev->dev,
- " tx_rings[%i]: dtype = %d\n",
- i, tx_ring->dtype);
- dev_info(&pf->pdev->dev,
" tx_rings[%i]: next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
i,
tx_ring->next_to_use,
@@ -365,8 +360,8 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
" work_limit = %d\n",
vsi->work_limit);
dev_info(&pf->pdev->dev,
- " max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n",
- vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype);
+ " max_frame = %d, rx_buf_len = %d dtype = %d\n",
+ vsi->max_frame, vsi->rx_buf_len, 0);
dev_info(&pf->pdev->dev,
" num_q_vectors = %i, base_vector = %i\n",
vsi->num_q_vectors, vsi->base_vector);
@@ -591,13 +586,6 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
" d[%03x] = 0x%016llx 0x%016llx\n",
i, txd->buffer_addr,
txd->cmd_type_offset_bsz);
- } else if (sizeof(union i40e_rx_desc) ==
- sizeof(union i40e_16byte_rx_desc)) {
- rxd = I40E_RX_DESC(ring, i);
- dev_info(&pf->pdev->dev,
- " d[%03x] = 0x%016llx 0x%016llx\n",
- i, rxd->read.pkt_addr,
- rxd->read.hdr_addr);
} else {
rxd = I40E_RX_DESC(ring, i);
dev_info(&pf->pdev->dev,
@@ -619,13 +607,6 @@ static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
"vsi = %02i tx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
vsi_seid, ring_id, desc_n,
txd->buffer_addr, txd->cmd_type_offset_bsz);
- } else if (sizeof(union i40e_rx_desc) ==
- sizeof(union i40e_16byte_rx_desc)) {
- rxd = I40E_RX_DESC(ring, desc_n);
- dev_info(&pf->pdev->dev,
- "vsi = %02i rx ring = %02i d[%03x] = 0x%016llx 0x%016llx\n",
- vsi_seid, ring_id, desc_n,
- rxd->read.pkt_addr, rxd->read.hdr_addr);
} else {
rxd = I40E_RX_DESC(ring, desc_n);
dev_info(&pf->pdev->dev,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 8e56c43c4104..51a994d85870 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -235,7 +235,6 @@ static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
"LinkPolling",
"flow-director-atr",
"veb-stats",
- "packet-split",
"hw-atr-eviction",
};
@@ -1275,6 +1274,13 @@ static int i40e_set_ringparam(struct net_device *netdev,
}
for (i = 0; i < vsi->num_queue_pairs; i++) {
+ /* this is to allow wr32 to have something to write to
+ * during early allocation of Rx buffers
+ */
+ u32 __iomem faketail = 0;
+ struct i40e_ring *ring;
+ u16 unused;
+
/* clone ring and setup updated count */
rx_rings[i] = *vsi->rx_rings[i];
rx_rings[i].count = new_rx_count;
@@ -1283,12 +1289,22 @@ static int i40e_set_ringparam(struct net_device *netdev,
*/
rx_rings[i].desc = NULL;
rx_rings[i].rx_bi = NULL;
+ rx_rings[i].tail = (u8 __iomem *)&faketail;
err = i40e_setup_rx_descriptors(&rx_rings[i]);
+ if (err)
+ goto rx_unwind;
+
+ /* now allocate the Rx buffers to make sure the OS
+ * has enough memory, any failure here means abort
+ */
+ ring = &rx_rings[i];
+ unused = I40E_DESC_UNUSED(ring);
+ err = i40e_alloc_rx_buffers(ring, unused);
+rx_unwind:
if (err) {
- while (i) {
- i--;
+ do {
i40e_free_rx_resources(&rx_rings[i]);
- }
+ } while (i--);
kfree(rx_rings);
rx_rings = NULL;
@@ -1314,6 +1330,17 @@ static int i40e_set_ringparam(struct net_device *netdev,
if (rx_rings) {
for (i = 0; i < vsi->num_queue_pairs; i++) {
i40e_free_rx_resources(vsi->rx_rings[i]);
+ /* get the real tail offset */
+ rx_rings[i].tail = vsi->rx_rings[i]->tail;
+ /* this is to fake out the allocation routine
+ * into thinking it has to realloc everything
+ * but the recycling logic will let us re-use
+ * the buffers allocated above
+ */
+ rx_rings[i].next_to_use = 0;
+ rx_rings[i].next_to_clean = 0;
+ rx_rings[i].next_to_alloc = 0;
+ /* do a struct copy */
*vsi->rx_rings[i] = rx_rings[i];
}
kfree(rx_rings);
@@ -2829,8 +2856,6 @@ static u32 i40e_get_priv_flags(struct net_device *dev)
I40E_PRIV_FLAGS_FD_ATR : 0;
ret_flags |= pf->flags & I40E_FLAG_VEB_STATS_ENABLED ?
I40E_PRIV_FLAGS_VEB_STATS : 0;
- ret_flags |= pf->flags & I40E_FLAG_RX_PS_ENABLED ?
- I40E_PRIV_FLAGS_PS : 0;
ret_flags |= pf->auto_disable_flags & I40E_FLAG_HW_ATR_EVICT_CAPABLE ?
0 : I40E_PRIV_FLAGS_HW_ATR_EVICT;
@@ -2851,23 +2876,6 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
/* NOTE: MFP is not settable */
- /* allow the user to control the method of receive
- * buffer DMA, whether the packet is split at header
- * boundaries into two separate buffers. In some cases
- * one routine or the other will perform better.
- */
- if ((flags & I40E_PRIV_FLAGS_PS) &&
- !(pf->flags & I40E_FLAG_RX_PS_ENABLED)) {
- pf->flags |= I40E_FLAG_RX_PS_ENABLED;
- pf->flags &= ~I40E_FLAG_RX_1BUF_ENABLED;
- reset_required = true;
- } else if (!(flags & I40E_PRIV_FLAGS_PS) &&
- (pf->flags & I40E_FLAG_RX_PS_ENABLED)) {
- pf->flags &= ~I40E_FLAG_RX_PS_ENABLED;
- pf->flags |= I40E_FLAG_RX_1BUF_ENABLED;
- reset_required = true;
- }
-
if (flags & I40E_PRIV_FLAGS_LINKPOLL_FLAG)
pf->flags |= I40E_FLAG_LINK_POLLING_ENABLED;
else
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index f6da6b76e678..46a3a674c635 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -2855,34 +2855,21 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
memset(&rx_ctx, 0, sizeof(rx_ctx));
ring->rx_buf_len = vsi->rx_buf_len;
- ring->rx_hdr_len = vsi->rx_hdr_len;
rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
- rx_ctx.hbuff = ring->rx_hdr_len >> I40E_RXQ_CTX_HBUFF_SHIFT;
rx_ctx.base = (ring->dma / 128);
rx_ctx.qlen = ring->count;
- if (vsi->back->flags & I40E_FLAG_16BYTE_RX_DESC_ENABLED) {
- set_ring_16byte_desc_enabled(ring);
- rx_ctx.dsize = 0;
- } else {
- rx_ctx.dsize = 1;
- }
+ /* use 32 byte descriptors */
+ rx_ctx.dsize = 1;
- rx_ctx.dtype = vsi->dtype;
- if (vsi->dtype) {
- set_ring_ps_enabled(ring);
- rx_ctx.hsplit_0 = I40E_RX_SPLIT_L2 |
- I40E_RX_SPLIT_IP |
- I40E_RX_SPLIT_TCP_UDP |
- I40E_RX_SPLIT_SCTP;
- } else {
- rx_ctx.hsplit_0 = 0;
- }
+ /* descriptor type is always zero
+ * rx_ctx.dtype = 0;
+ */
+ rx_ctx.hsplit_0 = 0;
- rx_ctx.rxmax = min_t(u16, vsi->max_frame,
- (chain_len * ring->rx_buf_len));
+ rx_ctx.rxmax = min_t(u16, vsi->max_frame, chain_len * ring->rx_buf_len);
if (hw->revision_id == 0)
rx_ctx.lrxqthresh = 0;
else
@@ -2919,12 +2906,7 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
writel(0, ring->tail);
- if (ring_is_ps_enabled(ring)) {
- i40e_alloc_rx_headers(ring);
- i40e_alloc_rx_buffers_ps(ring, I40E_DESC_UNUSED(ring));
- } else {
- i40e_alloc_rx_buffers_1buf(ring, I40E_DESC_UNUSED(ring));
- }
+ i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
return 0;
}
@@ -2963,40 +2945,18 @@ static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
else
vsi->max_frame = I40E_RXBUFFER_2048;
- /* figure out correct receive buffer length */
- switch (vsi->back->flags & (I40E_FLAG_RX_1BUF_ENABLED |
- I40E_FLAG_RX_PS_ENABLED)) {
- case I40E_FLAG_RX_1BUF_ENABLED:
- vsi->rx_hdr_len = 0;
- vsi->rx_buf_len = vsi->max_frame;
- vsi->dtype = I40E_RX_DTYPE_NO_SPLIT;
- break;
- case I40E_FLAG_RX_PS_ENABLED:
- vsi->rx_hdr_len = I40E_RX_HDR_SIZE;
- vsi->rx_buf_len = I40E_RXBUFFER_2048;
- vsi->dtype = I40E_RX_DTYPE_HEADER_SPLIT;
- break;
- default:
- vsi->rx_hdr_len = I40E_RX_HDR_SIZE;
- vsi->rx_buf_len = I40E_RXBUFFER_2048;
- vsi->dtype = I40E_RX_DTYPE_SPLIT_ALWAYS;
- break;
- }
+ vsi->rx_buf_len = I40E_RXBUFFER_2048;
#ifdef I40E_FCOE
/* setup rx buffer for FCoE */
if ((vsi->type == I40E_VSI_FCOE) &&
(vsi->back->flags & I40E_FLAG_FCOE_ENABLED)) {
- vsi->rx_hdr_len = 0;
vsi->rx_buf_len = I40E_RXBUFFER_3072;
vsi->max_frame = I40E_RXBUFFER_3072;
- vsi->dtype = I40E_RX_DTYPE_NO_SPLIT;
}
#endif /* I40E_FCOE */
/* round up for the chip's needs */
- vsi->rx_hdr_len = ALIGN(vsi->rx_hdr_len,
- BIT_ULL(I40E_RXQ_CTX_HBUFF_SHIFT));
vsi->rx_buf_len = ALIGN(vsi->rx_buf_len,
BIT_ULL(I40E_RXQ_CTX_DBUFF_SHIFT));
@@ -7512,10 +7472,6 @@ static int i40e_alloc_rings(struct i40e_vsi *vsi)
rx_ring->count = vsi->num_desc;
rx_ring->size = 0;
rx_ring->dcb_tc = 0;
- if (pf->flags & I40E_FLAG_16BYTE_RX_DESC_ENABLED)
- set_ring_16byte_desc_enabled(rx_ring);
- else
- clear_ring_16byte_desc_enabled(rx_ring);
rx_ring->rx_itr_setting = pf->rx_itr_default;
vsi->rx_rings[i] = rx_ring;
}
@@ -8460,11 +8416,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
I40E_FLAG_MSI_ENABLED |
I40E_FLAG_MSIX_ENABLED;
- if (iommu_present(&pci_bus_type))
- pf->flags |= I40E_FLAG_RX_PS_ENABLED;
- else
- pf->flags |= I40E_FLAG_RX_1BUF_ENABLED;
-
/* Set default ITR */
pf->rx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF;
pf->tx_itr_default = I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF;
@@ -10696,11 +10647,9 @@ static void i40e_print_features(struct i40e_pf *pf)
#ifdef CONFIG_PCI_IOV
i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
#endif
- i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d RX: %s",
+ i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
pf->hw.func_caps.num_vsis,
- pf->vsi[pf->lan_vsi]->num_queue_pairs,
- pf->flags & I40E_FLAG_RX_PS_ENABLED ? "PS" : "1BUF");
-
+ pf->vsi[pf->lan_vsi]->num_queue_pairs);
if (pf->flags & I40E_FLAG_RSS_ENABLED)
i += snprintf(&buf[i], REMAIN(i), " RSS");
if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 2765d7efdd9c..b0edffe88492 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1024,7 +1024,6 @@ err:
void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
- struct i40e_rx_buffer *rx_bi;
unsigned long bi_size;
u16 i;
@@ -1032,48 +1031,22 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
- if (ring_is_ps_enabled(rx_ring)) {
- int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
-
- rx_bi = &rx_ring->rx_bi[0];
- if (rx_bi->hdr_buf) {
- dma_free_coherent(dev,
- bufsz,
- rx_bi->hdr_buf,
- rx_bi->dma);
- for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- rx_bi->dma = 0;
- rx_bi->hdr_buf = NULL;
- }
- }
- }
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- if (rx_bi->dma) {
- dma_unmap_single(dev,
- rx_bi->dma,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- rx_bi->dma = 0;
- }
+ struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+
if (rx_bi->skb) {
dev_kfree_skb(rx_bi->skb);
rx_bi->skb = NULL;
}
- if (rx_bi->page) {
- if (rx_bi->page_dma) {
- dma_unmap_page(dev,
- rx_bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- rx_bi->page_dma = 0;
- }
- __free_page(rx_bi->page);
- rx_bi->page = NULL;
- rx_bi->page_offset = 0;
- }
+ if (!rx_bi->page)
+ continue;
+
+ dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ __free_pages(rx_bi->page, 0);
+
+ rx_bi->page = NULL;
+ rx_bi->page_offset = 0;
}
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
@@ -1082,6 +1055,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
@@ -1106,37 +1080,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
}
/**
- * i40e_alloc_rx_headers - allocate rx header buffers
- * @rx_ring: ring to alloc buffers
- *
- * Allocate rx header buffers for the entire ring. As these are static,
- * this is only called when setting up a new ring.
- **/
-void i40e_alloc_rx_headers(struct i40e_ring *rx_ring)
-{
- struct device *dev = rx_ring->dev;
- struct i40e_rx_buffer *rx_bi;
- dma_addr_t dma;
- void *buffer;
- int buf_size;
- int i;
-
- if (rx_ring->rx_bi[0].hdr_buf)
- return;
- /* Make sure the buffers don't cross cache line boundaries. */
- buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
- buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
- &dma, GFP_KERNEL);
- if (!buffer)
- return;
- for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- rx_bi->dma = dma + (i * buf_size);
- rx_bi->hdr_buf = buffer + (i * buf_size);
- }
-}
-
-/**
* i40e_setup_rx_descriptors - Allocate Rx descriptors
* @rx_ring: Rx descriptor ring (for a specific queue) to setup
*
@@ -1157,9 +1100,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
- rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
- ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
- : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+ rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
@@ -1170,6 +1111,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
goto err;
}
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -1188,6 +1130,10 @@ err:
static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
{
rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
@@ -1198,160 +1144,122 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
}
/**
- * i40e_alloc_rx_buffers_ps - Replace used receive buffers; packet split
- * @rx_ring: ring to place buffers on
- * @cleaned_count: number of buffers to replace
+ * i40e_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buffer struct to modify
*
- * Returns true if any errors on allocation
+ * Returns true if the page was successfully allocated or
+ * reused.
**/
-bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
+static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *bi)
{
- u16 i = rx_ring->next_to_use;
- union i40e_rx_desc *rx_desc;
- struct i40e_rx_buffer *bi;
- const int current_node = numa_node_id();
+ struct page *page = bi->page;
+ dma_addr_t dma;
- /* do nothing if no valid netdev defined */
- if (!rx_ring->netdev || !cleaned_count)
- return false;
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page)) {
+ rx_ring->rx_stats.page_reuse_count++;
+ return true;
+ }
- while (cleaned_count--) {
- rx_desc = I40E_RX_DESC(rx_ring, i);
- bi = &rx_ring->rx_bi[i];
+ /* alloc new page for storage */
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
- if (bi->skb) /* desc is in use */
- goto no_buffers;
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- /* If we've been moved to a different NUMA node, release the
- * page so we can get a new one on the current node.
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
*/
- if (bi->page && page_to_nid(bi->page) != current_node) {
- dma_unmap_page(rx_ring->dev,
- bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- __free_page(bi->page);
- bi->page = NULL;
- bi->page_dma = 0;
- rx_ring->rx_stats.realloc_count++;
- } else if (bi->page) {
- rx_ring->rx_stats.page_reuse_count++;
- }
-
- if (!bi->page) {
- bi->page = alloc_page(GFP_ATOMIC);
- if (!bi->page) {
- rx_ring->rx_stats.alloc_page_failed++;
- goto no_buffers;
- }
- bi->page_dma = dma_map_page(rx_ring->dev,
- bi->page,
- 0,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
- rx_ring->rx_stats.alloc_page_failed++;
- __free_page(bi->page);
- bi->page = NULL;
- bi->page_dma = 0;
- bi->page_offset = 0;
- goto no_buffers;
- }
- bi->page_offset = 0;
- }
-
- /* Refresh the desc even if buffer_addrs didn't change
- * because each write-back erases this info.
- */
- rx_desc->read.pkt_addr =
- cpu_to_le64(bi->page_dma + bi->page_offset);
- rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
- i++;
- if (i == rx_ring->count)
- i = 0;
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_pages(page, 0);
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
}
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
- return false;
+ return true;
+}
-no_buffers:
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+/**
+ * i40e_receive_skb - Send a completed packet up the stack
+ * @rx_ring: rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ **/
+static void i40e_receive_skb(struct i40e_ring *rx_ring,
+ struct sk_buff *skb, u16 vlan_tag)
+{
+ struct i40e_q_vector *q_vector = rx_ring->q_vector;
- /* make sure to come back via polling to try again after
- * allocation failure
- */
- return true;
+ if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (vlan_tag & VLAN_VID_MASK))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+
+ napi_gro_receive(&q_vector->napi, skb);
}
/**
- * i40e_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
+ * i40e_alloc_rx_buffers - Replace used receive buffers
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
*
- * Returns true if any errors on allocation
+ * Returns false if all allocations were successful, true if any fail
**/
-bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
{
- u16 i = rx_ring->next_to_use;
+ u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
- struct sk_buff *skb;
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
return false;
- while (cleaned_count--) {
- rx_desc = I40E_RX_DESC(rx_ring, i);
- bi = &rx_ring->rx_bi[i];
- skb = bi->skb;
-
- if (!skb) {
- skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_buf_len,
- GFP_ATOMIC |
- __GFP_NOWARN);
- if (!skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- goto no_buffers;
- }
- /* initialize queue mapping */
- skb_record_rx_queue(skb, rx_ring->queue_index);
- bi->skb = skb;
- }
+ rx_desc = I40E_RX_DESC(rx_ring, ntu);
+ bi = &rx_ring->rx_bi[ntu];
- if (!bi->dma) {
- bi->dma = dma_map_single(rx_ring->dev,
- skb->data,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev, bi->dma)) {
- rx_ring->rx_stats.alloc_buff_failed++;
- bi->dma = 0;
- dev_kfree_skb(bi->skb);
- bi->skb = NULL;
- goto no_buffers;
- }
- }
+ do {
+ if (!i40e_alloc_mapped_page(rx_ring, bi))
+ goto no_buffers;
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ /* Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc->read.hdr_addr = 0;
- i++;
- if (i == rx_ring->count)
- i = 0;
- }
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ rx_desc++;
+ bi++;
+ ntu++;
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = I40E_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_bi;
+ ntu = 0;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.qword1.status_error_len = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ if (rx_ring->next_to_use != ntu)
+ i40e_release_rx_desc(rx_ring, ntu);
return false;
no_buffers:
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ if (rx_ring->next_to_use != ntu)
+ i40e_release_rx_desc(rx_ring, ntu);
/* make sure to come back via polling to try again after
* allocation failure
@@ -1360,42 +1268,35 @@ no_buffers:
}
/**
- * i40e_receive_skb - Send a completed packet up the stack
- * @rx_ring: rx ring in play
- * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
- **/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
- struct sk_buff *skb, u16 vlan_tag)
-{
- struct i40e_q_vector *q_vector = rx_ring->q_vector;
-
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-
- napi_gro_receive(&q_vector->napi, skb);
-}
-
-/**
* i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
* @vsi: the VSI we care about
* @skb: skb currently being received and modified
- * @rx_status: status value of last descriptor in packet
- * @rx_error: error value of last descriptor in packet
- * @rx_ptype: ptype value of last descriptor in packet
+ * @rx_desc: the receive descriptor
+ *
+ * skb->protocol must be set before this function is called
**/
static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
struct sk_buff *skb,
- u32 rx_status,
- u32 rx_error,
- u16 rx_ptype)
+ union i40e_rx_desc *rx_desc)
{
- struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
- bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
+ struct i40e_rx_ptype_decoded decoded;
+ bool ipv4, ipv6, tunnel = false;
+ u32 rx_error, rx_status;
+ u8 ptype;
+ u64 qword;
+
+ qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
+ rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
+ I40E_RXD_QW1_ERROR_SHIFT;
+ rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
+ I40E_RXD_QW1_STATUS_SHIFT;
+ decoded = decode_rx_desc_ptype(ptype);
skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
/* Rx csum enabled and ip headers found? */
if (!(vsi->netdev->features & NETIF_F_RXCSUM))
return;
@@ -1441,14 +1342,13 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
* doesn't make it a hard requirement so if we have validated the
* inner checksum report CHECKSUM_UNNECESSARY.
*/
-
- ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
- ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
+ if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
+ I40E_RX_PTYPE_INNER_PROT_UDP |
+ I40E_RX_PTYPE_INNER_PROT_SCTP))
+ tunnel = true;
skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = ipv4_tunnel || ipv6_tunnel;
+ skb->csum_level = tunnel ? 1 : 0;
return;
@@ -1462,7 +1362,7 @@ checksum_fail:
*
* Returns a hash type to be used by skb_set_hash
**/
-static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
+static inline int i40e_ptype_to_htype(u8 ptype)
{
struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
@@ -1490,7 +1390,7 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
u8 rx_ptype)
{
u32 hash;
- const __le64 rss_mask =
+ const __le64 rss_mask =
cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
@@ -1504,338 +1404,419 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
}
/**
- * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
- * @rx_ring: rx ring to clean
- * @budget: how many cleans we're allowed
+ * i40e_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @rx_ptype: the packet type decoded by hardware
*
- * Returns true if there's any budget left (e.g. the clean is finished)
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
**/
-static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
+static inline
+void i40e_process_skb_fields(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc, struct sk_buff *skb,
+ u8 rx_ptype)
{
- unsigned int total_rx_bytes = 0, total_rx_packets = 0;
- u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
- u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- struct i40e_vsi *vsi = rx_ring->vsi;
- u16 i = rx_ring->next_to_clean;
- union i40e_rx_desc *rx_desc;
- u32 rx_error, rx_status;
- bool failure = false;
- u8 rx_ptype;
- u64 qword;
- u32 copysize;
+ u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
+ I40E_RXD_QW1_STATUS_SHIFT;
+ u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
+ I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
- if (budget <= 0)
- return 0;
+ if (unlikely(rsyn)) {
+ i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn);
+ rx_ring->last_rx_timestamp = jiffies;
+ }
- do {
- struct i40e_rx_buffer *rx_bi;
- struct sk_buff *skb;
- u16 vlan_tag;
- /* return some buffers to hardware, one at a time is too slow */
- if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- failure = failure ||
- i40e_alloc_rx_buffers_ps(rx_ring,
- cleaned_count);
- cleaned_count = 0;
- }
+ i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
- i = rx_ring->next_to_clean;
- rx_desc = I40E_RX_DESC(rx_ring, i);
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
- I40E_RXD_QW1_STATUS_SHIFT;
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
- if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
- break;
+ i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
- /* This memory barrier is needed to keep us from reading
- * any other fields out of the rx_desc until we know the
- * DD bit is set.
- */
- dma_rmb();
- /* sync header buffer for reading */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_ring->rx_bi[0].dma,
- i * rx_ring->rx_hdr_len,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
- if (i40e_rx_is_programming_status(qword)) {
- i40e_clean_programming_status(rx_ring, rx_desc);
- I40E_RX_INCREMENT(rx_ring, i);
- continue;
- }
- rx_bi = &rx_ring->rx_bi[i];
- skb = rx_bi->skb;
- if (likely(!skb)) {
- skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_hdr_len,
- GFP_ATOMIC |
- __GFP_NOWARN);
- if (!skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- failure = true;
- break;
- }
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+}
- /* initialize queue mapping */
- skb_record_rx_queue(skb, rx_ring->queue_index);
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_ring->rx_bi[0].dma,
- i * rx_ring->rx_hdr_len,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
- }
- rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
- rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
- rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
- I40E_RXD_QW1_LENGTH_SPH_SHIFT;
-
- rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
- I40E_RXD_QW1_ERROR_SHIFT;
- rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
- rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+/**
+ * i40e_pull_tail - i40e specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an i40e specific version of __pskb_pull_tail. The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb)
+{
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ unsigned char *va;
+ unsigned int pull_len;
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
- I40E_RXD_QW1_PTYPE_SHIFT;
- /* sync half-page for reading */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_bi->page_dma,
- rx_bi->page_offset,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
- rx_bi->skb = NULL;
- cleaned_count++;
- copysize = 0;
- if (rx_hbo || rx_sph) {
- int len;
+ /* it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
- if (rx_hbo)
- len = I40E_RX_HDR_SIZE;
- else
- len = rx_header_len;
- memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
- } else if (skb->len == 0) {
- int len;
- unsigned char *va = page_address(rx_bi->page) +
- rx_bi->page_offset;
-
- len = min(rx_packet_len, rx_ring->rx_hdr_len);
- memcpy(__skb_put(skb, len), va, len);
- copysize = len;
- rx_packet_len -= len;
- }
- /* Get the rest of the data if this was a header split */
- if (rx_packet_len) {
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- rx_bi->page,
- rx_bi->page_offset + copysize,
- rx_packet_len, I40E_RXBUFFER_2048);
-
- /* If the page count is more than 2, then both halves
- * of the page are used and we need to free it. Do it
- * here instead of in the alloc code. Otherwise one
- * of the half-pages might be released between now and
- * then, and we wouldn't know which one to use.
- * Don't call get_page and free_page since those are
- * both expensive atomic operations that just change
- * the refcount in opposite directions. Just give the
- * page to the stack; he can have our refcount.
- */
- if (page_count(rx_bi->page) > 2) {
- dma_unmap_page(rx_ring->dev,
- rx_bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- rx_bi->page = NULL;
- rx_bi->page_dma = 0;
- rx_ring->rx_stats.realloc_count++;
- } else {
- get_page(rx_bi->page);
- /* switch to the other half-page here; the
- * allocation code programs the right addr
- * into HW. If we haven't used this half-page,
- * the address won't be changed, and HW can
- * just use it next time through.
- */
- rx_bi->page_offset ^= PAGE_SIZE / 2;
- }
+ /* we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);
- }
- I40E_RX_INCREMENT(rx_ring, i);
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
- if (unlikely(
- !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
- struct i40e_rx_buffer *next_buffer;
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
- next_buffer = &rx_ring->rx_bi[i];
- next_buffer->skb = skb;
- rx_ring->rx_stats.non_eop_descs++;
- continue;
- }
+/**
+ * i40e_cleanup_headers - Correct empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being fixed
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
+{
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb))
+ i40e_pull_tail(rx_ring, skb);
- /* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
- dev_kfree_skb_any(skb);
- continue;
- }
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
- i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+ return false;
+}
- if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
- i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
- I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
- I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
- rx_ring->last_rx_timestamp = jiffies;
- }
+/**
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *old_buff)
+{
+ struct i40e_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
- /* probably a little skewed due to removing CRC */
- total_rx_bytes += skb->len;
- total_rx_packets++;
+ new_buff = &rx_ring->rx_bi[nta];
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
- i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
+ /* transfer page from old buffer to new buffer */
+ *new_buff = *old_buff;
+}
- vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
- ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
- : 0;
-#ifdef I40E_FCOE
- if (unlikely(
- i40e_rx_is_fcoe(rx_ptype) &&
- !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
- dev_kfree_skb_any(skb);
- continue;
- }
+/**
+ * i40e_page_is_reserved - check if reuse is possible
+ * @page: page struct to check
+ */
+static inline bool i40e_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *rx_buffer,
+ union i40e_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+ I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = I40E_RXBUFFER_2048;
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
- i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_desc->wb.qword1.status_error_len = 0;
+ /* will the data fit in the skb we allocated? if so, just
+ * copy it as it is pretty small anyway
+ */
+ if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
- } while (likely(total_rx_packets < budget));
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_packets += total_rx_packets;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ /* page is not reserved, we can reuse buffer as-is */
+ if (likely(!i40e_page_is_reserved(page)))
+ return true;
- return failure ? budget : total_rx_packets;
+ /* this page cannot be reused so discard it */
+ __free_pages(page, 0);
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ /* avoid re-using remote pages */
+ if (unlikely(i40e_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= truesize;
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > last_offset)
+ return false;
+#endif
+
+ /* Even if we own the page, we are not allowed to use atomic_set()
+ * This would break get_page_unless_zero() users.
+ */
+ get_page(rx_buffer->page);
+
+ return true;
}
/**
- * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
- * @rx_ring: rx ring to clean
- * @budget: how many cleans we're allowed
+ * i40e_fetch_rx_buffer - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_desc: descriptor containing info written by hardware
*
- * Returns number of packets cleaned
+ * This function allocates an skb on the fly, and populates it with the page
+ * data from the current receive descriptor, taking care to set up the skb
+ * correctly, as well as handling calling the page recycle function if
+ * necessary.
+ */
+static inline
+struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc)
+{
+ struct i40e_rx_buffer *rx_buffer;
+ struct sk_buff *skb;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ skb = rx_buffer->skb;
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) + rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+ I40E_RX_HDR_SIZE,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_buff_failed++;
+ return NULL;
+ }
+
+ /* we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+ } else {
+ rx_buffer->skb = NULL;
+ }
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ I40E_RXBUFFER_2048,
+ DMA_FROM_DEVICE);
+
+ /* pull page into skb */
+ if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ rx_ring->rx_stats.page_reuse_count++;
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+/**
+ * i40e_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
+
+#define staterrlen rx_desc->wb.qword1.status_error_len
+ if (unlikely(i40e_rx_is_programming_status(le64_to_cpu(staterrlen)))) {
+ i40e_clean_programming_status(rx_ring, rx_desc);
+ rx_ring->rx_bi[ntc].skb = skb;
+ return true;
+ }
+ /* if we are the last buffer then there is nothing else to do */
+#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
+ if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF)))
+ return false;
+
+ /* place skb in next buffer to be received */
+ rx_ring->rx_bi[ntc].skb = skb;
+ rx_ring->rx_stats.non_eop_descs++;
+
+ return true;
+}
+
+/**
+ * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
**/
-static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
+static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- struct i40e_vsi *vsi = rx_ring->vsi;
- union i40e_rx_desc *rx_desc;
- u32 rx_error, rx_status;
- u16 rx_packet_len;
bool failure = false;
- u8 rx_ptype;
- u64 qword;
- u16 i;
- do {
- struct i40e_rx_buffer *rx_bi;
+ while (likely(total_rx_packets < budget)) {
+ union i40e_rx_desc *rx_desc;
struct sk_buff *skb;
+ u32 rx_status;
u16 vlan_tag;
+ u8 rx_ptype;
+ u64 qword;
+
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
failure = failure ||
- i40e_alloc_rx_buffers_1buf(rx_ring,
- cleaned_count);
+ i40e_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
- i = rx_ring->next_to_clean;
- rx_desc = I40E_RX_DESC(rx_ring, i);
+ rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+ I40E_RXD_QW1_PTYPE_SHIFT;
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
- I40E_RXD_QW1_STATUS_SHIFT;
+ I40E_RXD_QW1_STATUS_SHIFT;
if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
+ /* status_error_len will always be zero for unused descriptors
+ * because it's cleared in cleanup, and overlaps with hdr_addr
+ * which is always zero because packet split isn't used, if the
+ * hardware wrote DD then it will be non-zero
+ */
+ if (!rx_desc->wb.qword1.status_error_len)
+ break;
+
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
* DD bit is set.
*/
dma_rmb();
- if (i40e_rx_is_programming_status(qword)) {
- i40e_clean_programming_status(rx_ring, rx_desc);
- I40E_RX_INCREMENT(rx_ring, i);
- continue;
- }
- rx_bi = &rx_ring->rx_bi[i];
- skb = rx_bi->skb;
- prefetch(skb->data);
-
- rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
-
- rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
- I40E_RXD_QW1_ERROR_SHIFT;
- rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+ skb = i40e_fetch_rx_buffer(rx_ring, rx_desc);
+ if (!skb)
+ break;
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
- I40E_RXD_QW1_PTYPE_SHIFT;
- rx_bi->skb = NULL;
cleaned_count++;
- /* Get the header and possibly the whole packet
- * If this is an skb from previous receive dma will be 0
- */
- skb_put(skb, rx_packet_len);
- dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- rx_bi->dma = 0;
-
- I40E_RX_INCREMENT(rx_ring, i);
-
- if (unlikely(
- !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
- rx_ring->rx_stats.non_eop_descs++;
+ if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue;
- }
- /* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ /* ERR_MASK will only have valid bits if EOP set, and
+ * what we are doing here is actually checking
+ * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+ * the error field
+ */
+ if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
continue;
}
- i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
- if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
- i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
- I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
- I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT);
- rx_ring->last_rx_timestamp = jiffies;
- }
+ if (i40e_cleanup_headers(rx_ring, skb))
+ continue;
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
- total_rx_packets++;
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ /* populate checksum, VLAN, and protocol */
+ i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
- i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
-
- vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
- ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
- : 0;
#ifdef I40E_FCOE
if (unlikely(
i40e_rx_is_fcoe(rx_ptype) &&
@@ -1844,10 +1825,15 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
continue;
}
#endif
+
+ vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
+ le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
+
i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_desc->wb.qword1.status_error_len = 0;
- } while (likely(total_rx_packets < budget));
+ /* update budget accounting */
+ total_rx_packets++;
+ }
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.packets += total_rx_packets;
@@ -1856,6 +1842,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ /* guarantee a trip back through this routine if there was a failure */
return failure ? budget : total_rx_packets;
}
@@ -2000,12 +1987,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
i40e_for_each_ring(ring, q_vector->rx) {
- int cleaned;
-
- if (ring_is_ps_enabled(ring))
- cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
- else
- cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
+ int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 6b2b1913527d..b78c810d1835 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -102,8 +102,8 @@ enum i40e_dyn_idx_t {
(((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
-/* Supported Rx Buffer Sizes */
-#define I40E_RXBUFFER_512 512 /* Used for packet split */
+/* Supported Rx Buffer Sizes (a multiple of 128) */
+#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* For FCoE MTU of 2158 */
#define I40E_RXBUFFER_4096 4096
@@ -114,9 +114,28 @@ enum i40e_dyn_idx_t {
* reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
* this adds up to 512 bytes of extra data meaning the smallest allocation
* we could have is 1K.
- * i.e. RXBUFFER_512 --> size-1024 slab
+ * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
+ * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
*/
-#define I40E_RX_HDR_SIZE I40E_RXBUFFER_512
+#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
+#define i40e_rx_desc i40e_32byte_rx_desc
+
+/**
+ * i40e_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
+ const u64 stat_err_bits)
+{
+ return !!(rx_desc->wb.qword1.status_error_len &
+ cpu_to_le64(stat_err_bits));
+}
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
@@ -142,8 +161,6 @@ enum i40e_dyn_idx_t {
prefetch((n)); \
} while (0)
-#define i40e_rx_desc i40e_32byte_rx_desc
-
#define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17
@@ -213,10 +230,8 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer {
struct sk_buff *skb;
- void *hdr_buf;
dma_addr_t dma;
struct page *page;
- dma_addr_t page_dma;
unsigned int page_offset;
};
@@ -245,22 +260,18 @@ struct i40e_rx_queue_stats {
enum i40e_ring_state_t {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
- __I40E_RX_PS_ENABLED,
- __I40E_RX_16BYTE_DESC_ENABLED,
};
-#define ring_is_ps_enabled(ring) \
- test_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define set_ring_ps_enabled(ring) \
- set_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define clear_ring_ps_enabled(ring) \
- clear_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define ring_is_16byte_desc_enabled(ring) \
- test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
-#define set_ring_16byte_desc_enabled(ring) \
- set_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
-#define clear_ring_16byte_desc_enabled(ring) \
- clear_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
+/* some useful defines for virtchannel interface, which
+ * is the only remaining user of header split
+ */
+#define I40E_RX_DTYPE_NO_SPLIT 0
+#define I40E_RX_DTYPE_HEADER_SPLIT 1
+#define I40E_RX_DTYPE_SPLIT_ALWAYS 2
+#define I40E_RX_SPLIT_L2 0x1
+#define I40E_RX_SPLIT_IP 0x2
+#define I40E_RX_SPLIT_TCP_UDP 0x4
+#define I40E_RX_SPLIT_SCTP 0x8
/* struct that defines a descriptor ring, associated with a VSI */
struct i40e_ring {
@@ -287,16 +298,7 @@ struct i40e_ring {
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
- u16 rx_hdr_len;
u16 rx_buf_len;
- u8 dtype;
-#define I40E_RX_DTYPE_NO_SPLIT 0
-#define I40E_RX_DTYPE_HEADER_SPLIT 1
-#define I40E_RX_DTYPE_SPLIT_ALWAYS 2
-#define I40E_RX_SPLIT_L2 0x1
-#define I40E_RX_SPLIT_IP 0x2
-#define I40E_RX_SPLIT_TCP_UDP 0x4
-#define I40E_RX_SPLIT_SCTP 0x8
/* used in interrupt processing */
u16 next_to_use;
@@ -330,6 +332,7 @@ struct i40e_ring {
struct i40e_q_vector *q_vector; /* Backreference to associated vector */
struct rcu_head rcu; /* to avoid race on free */
+ u16 next_to_alloc;
} ____cacheline_internodealigned_in_smp;
enum i40e_latency_range {
@@ -353,9 +356,7 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
-bool i40e_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count);
-bool i40e_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count);
-void i40e_alloc_rx_headers(struct i40e_ring *rxr);
+bool i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 6b9db7983693..a9b04e72df82 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -590,7 +590,7 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id,
}
rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT;
- /* set splitalways mode 10b */
+ /* set split mode 10b */
rx_ctx.dtype = I40E_RX_DTYPE_HEADER_SPLIT;
}
@@ -1544,7 +1544,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
list_for_each_entry(f, &vsi->mac_filter_list, list) {
aq_ret = 0;
- if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
+ if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) {
aq_ret =
i40e_aq_set_vsi_uc_promisc_on_vlan(hw,
vsi->seid,
@@ -1552,6 +1552,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
f->vlan,
NULL);
aq_err = pf->hw.aq.asq_last_status;
+ }
if (aq_ret)
dev_err(&pf->pdev->dev,
"Could not add VLAN %d to Unicast promiscuous domain err %s aq_err %s\n",
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index ede8dfc189bc..fd7dae46c5d8 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -496,7 +496,6 @@ err:
void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
{
struct device *dev = rx_ring->dev;
- struct i40e_rx_buffer *rx_bi;
unsigned long bi_size;
u16 i;
@@ -504,48 +503,22 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
if (!rx_ring->rx_bi)
return;
- if (ring_is_ps_enabled(rx_ring)) {
- int bufsz = ALIGN(rx_ring->rx_hdr_len, 256) * rx_ring->count;
-
- rx_bi = &rx_ring->rx_bi[0];
- if (rx_bi->hdr_buf) {
- dma_free_coherent(dev,
- bufsz,
- rx_bi->hdr_buf,
- rx_bi->dma);
- for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- rx_bi->dma = 0;
- rx_bi->hdr_buf = NULL;
- }
- }
- }
/* Free all the Rx ring sk_buffs */
for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- if (rx_bi->dma) {
- dma_unmap_single(dev,
- rx_bi->dma,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- rx_bi->dma = 0;
- }
+ struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+
if (rx_bi->skb) {
dev_kfree_skb(rx_bi->skb);
rx_bi->skb = NULL;
}
- if (rx_bi->page) {
- if (rx_bi->page_dma) {
- dma_unmap_page(dev,
- rx_bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- rx_bi->page_dma = 0;
- }
- __free_page(rx_bi->page);
- rx_bi->page = NULL;
- rx_bi->page_offset = 0;
- }
+ if (!rx_bi->page)
+ continue;
+
+ dma_unmap_page(dev, rx_bi->dma, PAGE_SIZE, DMA_FROM_DEVICE);
+ __free_pages(rx_bi->page, 0);
+
+ rx_bi->page = NULL;
+ rx_bi->page_offset = 0;
}
bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
@@ -554,6 +527,7 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
/* Zero out the descriptor ring */
memset(rx_ring->desc, 0, rx_ring->size);
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
}
@@ -578,37 +552,6 @@ void i40evf_free_rx_resources(struct i40e_ring *rx_ring)
}
/**
- * i40evf_alloc_rx_headers - allocate rx header buffers
- * @rx_ring: ring to alloc buffers
- *
- * Allocate rx header buffers for the entire ring. As these are static,
- * this is only called when setting up a new ring.
- **/
-void i40evf_alloc_rx_headers(struct i40e_ring *rx_ring)
-{
- struct device *dev = rx_ring->dev;
- struct i40e_rx_buffer *rx_bi;
- dma_addr_t dma;
- void *buffer;
- int buf_size;
- int i;
-
- if (rx_ring->rx_bi[0].hdr_buf)
- return;
- /* Make sure the buffers don't cross cache line boundaries. */
- buf_size = ALIGN(rx_ring->rx_hdr_len, 256);
- buffer = dma_alloc_coherent(dev, buf_size * rx_ring->count,
- &dma, GFP_KERNEL);
- if (!buffer)
- return;
- for (i = 0; i < rx_ring->count; i++) {
- rx_bi = &rx_ring->rx_bi[i];
- rx_bi->dma = dma + (i * buf_size);
- rx_bi->hdr_buf = buffer + (i * buf_size);
- }
-}
-
-/**
* i40evf_setup_rx_descriptors - Allocate Rx descriptors
* @rx_ring: Rx descriptor ring (for a specific queue) to setup
*
@@ -629,9 +572,7 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring)
u64_stats_init(&rx_ring->syncp);
/* Round up to nearest 4K */
- rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
- ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
- : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+ rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
rx_ring->size = ALIGN(rx_ring->size, 4096);
rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
&rx_ring->dma, GFP_KERNEL);
@@ -642,6 +583,7 @@ int i40evf_setup_rx_descriptors(struct i40e_ring *rx_ring)
goto err;
}
+ rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -660,6 +602,10 @@ err:
static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
{
rx_ring->next_to_use = val;
+
+ /* update next to alloc since we have filled the ring */
+ rx_ring->next_to_alloc = val;
+
/* Force memory writes to complete before letting h/w
* know there are new descriptors to fetch. (Only
* applicable for weak-ordered memory model archs,
@@ -670,160 +616,122 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
}
/**
- * i40evf_alloc_rx_buffers_ps - Replace used receive buffers; packet split
- * @rx_ring: ring to place buffers on
- * @cleaned_count: number of buffers to replace
+ * i40e_alloc_mapped_page - recycle or make a new page
+ * @rx_ring: ring to use
+ * @bi: rx_buffer struct to modify
*
- * Returns true if any errors on allocation
+ * Returns true if the page was successfully allocated or
+ * reused.
**/
-bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rx_ring, u16 cleaned_count)
+static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *bi)
{
- u16 i = rx_ring->next_to_use;
- union i40e_rx_desc *rx_desc;
- struct i40e_rx_buffer *bi;
- const int current_node = numa_node_id();
+ struct page *page = bi->page;
+ dma_addr_t dma;
- /* do nothing if no valid netdev defined */
- if (!rx_ring->netdev || !cleaned_count)
- return false;
+ /* since we are recycling buffers we should seldom need to alloc */
+ if (likely(page)) {
+ rx_ring->rx_stats.page_reuse_count++;
+ return true;
+ }
- while (cleaned_count--) {
- rx_desc = I40E_RX_DESC(rx_ring, i);
- bi = &rx_ring->rx_bi[i];
+ /* alloc new page for storage */
+ page = dev_alloc_page();
+ if (unlikely(!page)) {
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
+ }
- if (bi->skb) /* desc is in use */
- goto no_buffers;
+ /* map page for use */
+ dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- /* If we've been moved to a different NUMA node, release the
- * page so we can get a new one on the current node.
+ /* if mapping failed free memory back to system since
+ * there isn't much point in holding memory we can't use
*/
- if (bi->page && page_to_nid(bi->page) != current_node) {
- dma_unmap_page(rx_ring->dev,
- bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- __free_page(bi->page);
- bi->page = NULL;
- bi->page_dma = 0;
- rx_ring->rx_stats.realloc_count++;
- } else if (bi->page) {
- rx_ring->rx_stats.page_reuse_count++;
- }
-
- if (!bi->page) {
- bi->page = alloc_page(GFP_ATOMIC);
- if (!bi->page) {
- rx_ring->rx_stats.alloc_page_failed++;
- goto no_buffers;
- }
- bi->page_dma = dma_map_page(rx_ring->dev,
- bi->page,
- 0,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev, bi->page_dma)) {
- rx_ring->rx_stats.alloc_page_failed++;
- __free_page(bi->page);
- bi->page = NULL;
- bi->page_dma = 0;
- bi->page_offset = 0;
- goto no_buffers;
- }
- bi->page_offset = 0;
- }
-
- /* Refresh the desc even if buffer_addrs didn't change
- * because each write-back erases this info.
- */
- rx_desc->read.pkt_addr =
- cpu_to_le64(bi->page_dma + bi->page_offset);
- rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
- i++;
- if (i == rx_ring->count)
- i = 0;
+ if (dma_mapping_error(rx_ring->dev, dma)) {
+ __free_pages(page, 0);
+ rx_ring->rx_stats.alloc_page_failed++;
+ return false;
}
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ bi->dma = dma;
+ bi->page = page;
+ bi->page_offset = 0;
- return false;
+ return true;
+}
-no_buffers:
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+/**
+ * i40e_receive_skb - Send a completed packet up the stack
+ * @rx_ring: rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ **/
+static void i40e_receive_skb(struct i40e_ring *rx_ring,
+ struct sk_buff *skb, u16 vlan_tag)
+{
+ struct i40e_q_vector *q_vector = rx_ring->q_vector;
- /* make sure to come back via polling to try again after
- * allocation failure
- */
- return true;
+ if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+ (vlan_tag & VLAN_VID_MASK))
+ __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+
+ napi_gro_receive(&q_vector->napi, skb);
}
/**
- * i40evf_alloc_rx_buffers_1buf - Replace used receive buffers; single buffer
+ * i40evf_alloc_rx_buffers - Replace used receive buffers
* @rx_ring: ring to place buffers on
* @cleaned_count: number of buffers to replace
*
- * Returns true if any errors on allocation
+ * Returns false if all allocations were successful, true if any fail
**/
-bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rx_ring, u16 cleaned_count)
+bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
{
- u16 i = rx_ring->next_to_use;
+ u16 ntu = rx_ring->next_to_use;
union i40e_rx_desc *rx_desc;
struct i40e_rx_buffer *bi;
- struct sk_buff *skb;
/* do nothing if no valid netdev defined */
if (!rx_ring->netdev || !cleaned_count)
return false;
- while (cleaned_count--) {
- rx_desc = I40E_RX_DESC(rx_ring, i);
- bi = &rx_ring->rx_bi[i];
- skb = bi->skb;
-
- if (!skb) {
- skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_buf_len,
- GFP_ATOMIC |
- __GFP_NOWARN);
- if (!skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- goto no_buffers;
- }
- /* initialize queue mapping */
- skb_record_rx_queue(skb, rx_ring->queue_index);
- bi->skb = skb;
- }
+ rx_desc = I40E_RX_DESC(rx_ring, ntu);
+ bi = &rx_ring->rx_bi[ntu];
- if (!bi->dma) {
- bi->dma = dma_map_single(rx_ring->dev,
- skb->data,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- if (dma_mapping_error(rx_ring->dev, bi->dma)) {
- rx_ring->rx_stats.alloc_buff_failed++;
- bi->dma = 0;
- dev_kfree_skb(bi->skb);
- bi->skb = NULL;
- goto no_buffers;
- }
- }
+ do {
+ if (!i40e_alloc_mapped_page(rx_ring, bi))
+ goto no_buffers;
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+ /* Refresh the desc even if buffer_addrs didn't change
+ * because each write-back erases this info.
+ */
+ rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
rx_desc->read.hdr_addr = 0;
- i++;
- if (i == rx_ring->count)
- i = 0;
- }
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ rx_desc++;
+ bi++;
+ ntu++;
+ if (unlikely(ntu == rx_ring->count)) {
+ rx_desc = I40E_RX_DESC(rx_ring, 0);
+ bi = rx_ring->rx_bi;
+ ntu = 0;
+ }
+
+ /* clear the status bits for the next_to_use descriptor */
+ rx_desc->wb.qword1.status_error_len = 0;
+
+ cleaned_count--;
+ } while (cleaned_count);
+
+ if (rx_ring->next_to_use != ntu)
+ i40e_release_rx_desc(rx_ring, ntu);
return false;
no_buffers:
- if (rx_ring->next_to_use != i)
- i40e_release_rx_desc(rx_ring, i);
+ if (rx_ring->next_to_use != ntu)
+ i40e_release_rx_desc(rx_ring, ntu);
/* make sure to come back via polling to try again after
* allocation failure
@@ -832,42 +740,35 @@ no_buffers:
}
/**
- * i40e_receive_skb - Send a completed packet up the stack
- * @rx_ring: rx ring in play
- * @skb: packet to send up
- * @vlan_tag: vlan tag for packet
- **/
-static void i40e_receive_skb(struct i40e_ring *rx_ring,
- struct sk_buff *skb, u16 vlan_tag)
-{
- struct i40e_q_vector *q_vector = rx_ring->q_vector;
-
- if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
- (vlan_tag & VLAN_VID_MASK))
- __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-
- napi_gro_receive(&q_vector->napi, skb);
-}
-
-/**
* i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
* @vsi: the VSI we care about
* @skb: skb currently being received and modified
- * @rx_status: status value of last descriptor in packet
- * @rx_error: error value of last descriptor in packet
- * @rx_ptype: ptype value of last descriptor in packet
+ * @rx_desc: the receive descriptor
+ *
+ * skb->protocol must be set before this function is called
**/
static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
struct sk_buff *skb,
- u32 rx_status,
- u32 rx_error,
- u16 rx_ptype)
+ union i40e_rx_desc *rx_desc)
{
- struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(rx_ptype);
- bool ipv4, ipv6, ipv4_tunnel, ipv6_tunnel;
+ struct i40e_rx_ptype_decoded decoded;
+ bool ipv4, ipv6, tunnel = false;
+ u32 rx_error, rx_status;
+ u8 ptype;
+ u64 qword;
+
+ qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
+ rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
+ I40E_RXD_QW1_ERROR_SHIFT;
+ rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
+ I40E_RXD_QW1_STATUS_SHIFT;
+ decoded = decode_rx_desc_ptype(ptype);
skb->ip_summed = CHECKSUM_NONE;
+ skb_checksum_none_assert(skb);
+
/* Rx csum enabled and ip headers found? */
if (!(vsi->netdev->features & NETIF_F_RXCSUM))
return;
@@ -913,14 +814,13 @@ static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
* doesn't make it a hard requirement so if we have validated the
* inner checksum report CHECKSUM_UNNECESSARY.
*/
-
- ipv4_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT4_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT4_MACVLAN_IPV6_ICMP_PAY4);
- ipv6_tunnel = (rx_ptype >= I40E_RX_PTYPE_GRENAT6_MAC_PAY3) &&
- (rx_ptype <= I40E_RX_PTYPE_GRENAT6_MACVLAN_IPV6_ICMP_PAY4);
+ if (decoded.inner_prot & (I40E_RX_PTYPE_INNER_PROT_TCP |
+ I40E_RX_PTYPE_INNER_PROT_UDP |
+ I40E_RX_PTYPE_INNER_PROT_SCTP))
+ tunnel = true;
skb->ip_summed = CHECKSUM_UNNECESSARY;
- skb->csum_level = ipv4_tunnel || ipv6_tunnel;
+ skb->csum_level = tunnel ? 1 : 0;
return;
@@ -934,7 +834,7 @@ checksum_fail:
*
* Returns a hash type to be used by skb_set_hash
**/
-static inline enum pkt_hash_types i40e_ptype_to_htype(u8 ptype)
+static inline int i40e_ptype_to_htype(u8 ptype)
{
struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
@@ -962,7 +862,7 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
u8 rx_ptype)
{
u32 hash;
- const __le64 rss_mask =
+ const __le64 rss_mask =
cpu_to_le64((u64)I40E_RX_DESC_FLTSTAT_RSS_HASH <<
I40E_RX_DESC_STATUS_FLTSTAT_SHIFT);
@@ -976,315 +876,411 @@ static inline void i40e_rx_hash(struct i40e_ring *ring,
}
/**
- * i40e_clean_rx_irq_ps - Reclaim resources after receive; packet split
- * @rx_ring: rx ring to clean
- * @budget: how many cleans we're allowed
+ * i40evf_process_skb_fields - Populate skb header fields from Rx descriptor
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being populated
+ * @rx_ptype: the packet type decoded by hardware
*
- * Returns true if there's any budget left (e.g. the clean is finished)
+ * This function checks the ring, descriptor, and packet information in
+ * order to populate the hash, checksum, VLAN, protocol, and
+ * other fields within the skb.
**/
-static int i40e_clean_rx_irq_ps(struct i40e_ring *rx_ring, const int budget)
+static inline
+void i40evf_process_skb_fields(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc, struct sk_buff *skb,
+ u8 rx_ptype)
{
- unsigned int total_rx_bytes = 0, total_rx_packets = 0;
- u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
- u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- struct i40e_vsi *vsi = rx_ring->vsi;
- u16 i = rx_ring->next_to_clean;
- union i40e_rx_desc *rx_desc;
- u32 rx_error, rx_status;
- bool failure = false;
- u8 rx_ptype;
- u64 qword;
- u32 copysize;
+ i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
- do {
- struct i40e_rx_buffer *rx_bi;
- struct sk_buff *skb;
- u16 vlan_tag;
- /* return some buffers to hardware, one at a time is too slow */
- if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
- failure = failure ||
- i40evf_alloc_rx_buffers_ps(rx_ring,
- cleaned_count);
- cleaned_count = 0;
- }
+ /* modifies the skb - consumes the enet header */
+ skb->protocol = eth_type_trans(skb, rx_ring->netdev);
- i = rx_ring->next_to_clean;
- rx_desc = I40E_RX_DESC(rx_ring, i);
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
- I40E_RXD_QW1_STATUS_SHIFT;
+ i40e_rx_checksum(rx_ring->vsi, skb, rx_desc);
- if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
- break;
+ skb_record_rx_queue(skb, rx_ring->queue_index);
+}
- /* This memory barrier is needed to keep us from reading
- * any other fields out of the rx_desc until we know the
- * DD bit is set.
- */
- dma_rmb();
- /* sync header buffer for reading */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_ring->rx_bi[0].dma,
- i * rx_ring->rx_hdr_len,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
- rx_bi = &rx_ring->rx_bi[i];
- skb = rx_bi->skb;
- if (likely(!skb)) {
- skb = __netdev_alloc_skb_ip_align(rx_ring->netdev,
- rx_ring->rx_hdr_len,
- GFP_ATOMIC |
- __GFP_NOWARN);
- if (!skb) {
- rx_ring->rx_stats.alloc_buff_failed++;
- failure = true;
- break;
- }
+/**
+ * i40e_pull_tail - i40e specific version of skb_pull_tail
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being adjusted
+ *
+ * This function is an i40e specific version of __pskb_pull_tail. The
+ * main difference between this version and the original function is that
+ * this function can make several assumptions about the state of things
+ * that allow for significant optimizations versus the standard function.
+ * As a result we can do things like drop a frag and maintain an accurate
+ * truesize for the skb.
+ */
+static void i40e_pull_tail(struct i40e_ring *rx_ring, struct sk_buff *skb)
+{
+ struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+ unsigned char *va;
+ unsigned int pull_len;
- /* initialize queue mapping */
- skb_record_rx_queue(skb, rx_ring->queue_index);
- /* we are reusing so sync this buffer for CPU use */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_ring->rx_bi[0].dma,
- i * rx_ring->rx_hdr_len,
- rx_ring->rx_hdr_len,
- DMA_FROM_DEVICE);
- }
- rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
- rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
- rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK) >>
- I40E_RXD_QW1_LENGTH_SPH_SHIFT;
-
- rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
- I40E_RXD_QW1_ERROR_SHIFT;
- rx_hbo = rx_error & BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
- rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+ /* it is valid to use page_address instead of kmap since we are
+ * working with pages allocated out of the lomem pool per
+ * alloc_page(GFP_ATOMIC)
+ */
+ va = skb_frag_address(frag);
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
- I40E_RXD_QW1_PTYPE_SHIFT;
- /* sync half-page for reading */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_bi->page_dma,
- rx_bi->page_offset,
- PAGE_SIZE / 2,
- DMA_FROM_DEVICE);
- prefetch(page_address(rx_bi->page) + rx_bi->page_offset);
- rx_bi->skb = NULL;
- cleaned_count++;
- copysize = 0;
- if (rx_hbo || rx_sph) {
- int len;
-
- if (rx_hbo)
- len = I40E_RX_HDR_SIZE;
- else
- len = rx_header_len;
- memcpy(__skb_put(skb, len), rx_bi->hdr_buf, len);
- } else if (skb->len == 0) {
- int len;
- unsigned char *va = page_address(rx_bi->page) +
- rx_bi->page_offset;
-
- len = min(rx_packet_len, rx_ring->rx_hdr_len);
- memcpy(__skb_put(skb, len), va, len);
- copysize = len;
- rx_packet_len -= len;
- }
- /* Get the rest of the data if this was a header split */
- if (rx_packet_len) {
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
- rx_bi->page,
- rx_bi->page_offset + copysize,
- rx_packet_len, I40E_RXBUFFER_2048);
-
- /* If the page count is more than 2, then both halves
- * of the page are used and we need to free it. Do it
- * here instead of in the alloc code. Otherwise one
- * of the half-pages might be released between now and
- * then, and we wouldn't know which one to use.
- * Don't call get_page and free_page since those are
- * both expensive atomic operations that just change
- * the refcount in opposite directions. Just give the
- * page to the stack; he can have our refcount.
- */
- if (page_count(rx_bi->page) > 2) {
- dma_unmap_page(rx_ring->dev,
- rx_bi->page_dma,
- PAGE_SIZE,
- DMA_FROM_DEVICE);
- rx_bi->page = NULL;
- rx_bi->page_dma = 0;
- rx_ring->rx_stats.realloc_count++;
- } else {
- get_page(rx_bi->page);
- /* switch to the other half-page here; the
- * allocation code programs the right addr
- * into HW. If we haven't used this half-page,
- * the address won't be changed, and HW can
- * just use it next time through.
- */
- rx_bi->page_offset ^= PAGE_SIZE / 2;
- }
+ /* we need the header to contain the greater of either ETH_HLEN or
+ * 60 bytes if the skb->len is less than 60 for skb_pad.
+ */
+ pull_len = eth_get_headlen(va, I40E_RX_HDR_SIZE);
- }
- I40E_RX_INCREMENT(rx_ring, i);
+ /* align pull length to size of long to optimize memcpy performance */
+ skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
- if (unlikely(
- !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
- struct i40e_rx_buffer *next_buffer;
+ /* update all of the pointers */
+ skb_frag_size_sub(frag, pull_len);
+ frag->page_offset += pull_len;
+ skb->data_len -= pull_len;
+ skb->tail += pull_len;
+}
- next_buffer = &rx_ring->rx_bi[i];
- next_buffer->skb = skb;
- rx_ring->rx_stats.non_eop_descs++;
- continue;
- }
+/**
+ * i40e_cleanup_headers - Correct empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @skb: pointer to current skb being fixed
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb)
+{
+ /* place header in linear portion of buffer */
+ if (skb_is_nonlinear(skb))
+ i40e_pull_tail(rx_ring, skb);
- /* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
- dev_kfree_skb_any(skb);
- continue;
- }
+ /* if eth_skb_pad returns an error the skb was freed */
+ if (eth_skb_pad(skb))
+ return true;
- i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+ return false;
+}
- /* probably a little skewed due to removing CRC */
- total_rx_bytes += skb->len;
- total_rx_packets++;
+/**
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *old_buff)
+{
+ struct i40e_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ new_buff = &rx_ring->rx_bi[nta];
- i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
- vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
- ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
- : 0;
-#ifdef I40E_FCOE
- if (unlikely(
- i40e_rx_is_fcoe(rx_ptype) &&
- !i40e_fcoe_handle_offload(rx_ring, rx_desc, skb))) {
- dev_kfree_skb_any(skb);
- continue;
- }
+ /* transfer page from old buffer to new buffer */
+ *new_buff = *old_buff;
+}
+
+/**
+ * i40e_page_is_reserved - check if reuse is possible
+ * @page: page struct to check
+ */
+static inline bool i40e_page_is_reserved(struct page *page)
+{
+ return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
+}
+
+/**
+ * i40e_add_rx_frag - Add contents of Rx buffer to sk_buff
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_buffer: buffer containing page to add
+ * @rx_desc: descriptor containing length of buffer written by hardware
+ * @skb: sk_buff to place the data into
+ *
+ * This function will add the data contained in rx_buffer->page to the skb.
+ * This is done either through a direct copy if the data in the buffer is
+ * less than the skb header size, otherwise it will just attach the page as
+ * a frag to the skb.
+ *
+ * The function will then update the page offset if necessary and return
+ * true if the buffer can be reused by the adapter.
+ **/
+static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *rx_buffer,
+ union i40e_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ struct page *page = rx_buffer->page;
+ u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ unsigned int size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
+ I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+#if (PAGE_SIZE < 8192)
+ unsigned int truesize = I40E_RXBUFFER_2048;
+#else
+ unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+ unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
#endif
- i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_desc->wb.qword1.status_error_len = 0;
+ /* will the data fit in the skb we allocated? if so, just
+ * copy it as it is pretty small anyway
+ */
+ if ((size <= I40E_RX_HDR_SIZE) && !skb_is_nonlinear(skb)) {
+ unsigned char *va = page_address(page) + rx_buffer->page_offset;
- } while (likely(total_rx_packets < budget));
+ memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
- u64_stats_update_begin(&rx_ring->syncp);
- rx_ring->stats.packets += total_rx_packets;
- rx_ring->stats.bytes += total_rx_bytes;
- u64_stats_update_end(&rx_ring->syncp);
- rx_ring->q_vector->rx.total_packets += total_rx_packets;
- rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ /* page is not reserved, we can reuse buffer as-is */
+ if (likely(!i40e_page_is_reserved(page)))
+ return true;
- return failure ? budget : total_rx_packets;
+ /* this page cannot be reused so discard it */
+ __free_pages(page, 0);
+ return false;
+ }
+
+ skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
+ rx_buffer->page_offset, size, truesize);
+
+ /* avoid re-using remote pages */
+ if (unlikely(i40e_page_is_reserved(page)))
+ return false;
+
+#if (PAGE_SIZE < 8192)
+ /* if we are only owner of page we can reuse it */
+ if (unlikely(page_count(page) != 1))
+ return false;
+
+ /* flip page offset to other buffer */
+ rx_buffer->page_offset ^= truesize;
+#else
+ /* move offset up to the next cache line */
+ rx_buffer->page_offset += truesize;
+
+ if (rx_buffer->page_offset > last_offset)
+ return false;
+#endif
+
+ /* Even if we own the page, we are not allowed to use atomic_set()
+ * This would break get_page_unless_zero() users.
+ */
+ get_page(rx_buffer->page);
+
+ return true;
+}
+
+/**
+ * i40evf_fetch_rx_buffer - Allocate skb and populate it
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @rx_desc: descriptor containing info written by hardware
+ *
+ * This function allocates an skb on the fly, and populates it with the page
+ * data from the current receive descriptor, taking care to set up the skb
+ * correctly, as well as handling calling the page recycle function if
+ * necessary.
+ */
+static inline
+struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc)
+{
+ struct i40e_rx_buffer *rx_buffer;
+ struct sk_buff *skb;
+ struct page *page;
+
+ rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+ page = rx_buffer->page;
+ prefetchw(page);
+
+ skb = rx_buffer->skb;
+
+ if (likely(!skb)) {
+ void *page_addr = page_address(page) + rx_buffer->page_offset;
+
+ /* prefetch first cache line of first page */
+ prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+ prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+ /* allocate a skb to store the frags */
+ skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
+ I40E_RX_HDR_SIZE,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(!skb)) {
+ rx_ring->rx_stats.alloc_buff_failed++;
+ return NULL;
+ }
+
+ /* we will be copying header into skb->data in
+ * pskb_may_pull so it is in our interest to prefetch
+ * it now to avoid a possible cache miss
+ */
+ prefetchw(skb->data);
+ } else {
+ rx_buffer->skb = NULL;
+ }
+
+ /* we are reusing so sync this buffer for CPU use */
+ dma_sync_single_range_for_cpu(rx_ring->dev,
+ rx_buffer->dma,
+ rx_buffer->page_offset,
+ I40E_RXBUFFER_2048,
+ DMA_FROM_DEVICE);
+
+ /* pull page into skb */
+ if (i40e_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) {
+ /* hand second half of page back to the ring */
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ rx_ring->rx_stats.page_reuse_count++;
+ } else {
+ /* we are not reusing the buffer so unmap it */
+ dma_unmap_page(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ }
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
+ return skb;
+}
+
+/**
+ * i40e_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool i40e_is_non_eop(struct i40e_ring *rx_ring,
+ union i40e_rx_desc *rx_desc,
+ struct sk_buff *skb)
+{
+ u32 ntc = rx_ring->next_to_clean + 1;
+
+ /* fetch, update, and store next to clean */
+ ntc = (ntc < rx_ring->count) ? ntc : 0;
+ rx_ring->next_to_clean = ntc;
+
+ prefetch(I40E_RX_DESC(rx_ring, ntc));
+
+ /* if we are the last buffer then there is nothing else to do */
+#define I40E_RXD_EOF BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)
+ if (likely(i40e_test_staterr(rx_desc, I40E_RXD_EOF)))
+ return false;
+
+ /* place skb in next buffer to be received */
+ rx_ring->rx_bi[ntc].skb = skb;
+ rx_ring->rx_stats.non_eop_descs++;
+
+ return true;
}
/**
- * i40e_clean_rx_irq_1buf - Reclaim resources after receive; single buffer
- * @rx_ring: rx ring to clean
- * @budget: how many cleans we're allowed
+ * i40e_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
*
- * Returns number of packets cleaned
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing. The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed
**/
-static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
+static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
{
unsigned int total_rx_bytes = 0, total_rx_packets = 0;
u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
- struct i40e_vsi *vsi = rx_ring->vsi;
- union i40e_rx_desc *rx_desc;
- u32 rx_error, rx_status;
- u16 rx_packet_len;
bool failure = false;
- u8 rx_ptype;
- u64 qword;
- u16 i;
- do {
- struct i40e_rx_buffer *rx_bi;
+ while (likely(total_rx_packets < budget)) {
+ union i40e_rx_desc *rx_desc;
struct sk_buff *skb;
+ u32 rx_status;
u16 vlan_tag;
+ u8 rx_ptype;
+ u64 qword;
+
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
failure = failure ||
- i40evf_alloc_rx_buffers_1buf(rx_ring,
- cleaned_count);
+ i40evf_alloc_rx_buffers(rx_ring, cleaned_count);
cleaned_count = 0;
}
- i = rx_ring->next_to_clean;
- rx_desc = I40E_RX_DESC(rx_ring, i);
+ rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
+
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+ rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+ I40E_RXD_QW1_PTYPE_SHIFT;
rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
- I40E_RXD_QW1_STATUS_SHIFT;
+ I40E_RXD_QW1_STATUS_SHIFT;
if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
break;
+ /* status_error_len will always be zero for unused descriptors
+ * because it's cleared in cleanup, and overlaps with hdr_addr
+ * which is always zero because packet split isn't used, if the
+ * hardware wrote DD then it will be non-zero
+ */
+ if (!rx_desc->wb.qword1.status_error_len)
+ break;
+
/* This memory barrier is needed to keep us from reading
* any other fields out of the rx_desc until we know the
* DD bit is set.
*/
dma_rmb();
- rx_bi = &rx_ring->rx_bi[i];
- skb = rx_bi->skb;
- prefetch(skb->data);
-
- rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
- I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
-
- rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
- I40E_RXD_QW1_ERROR_SHIFT;
- rx_error &= ~BIT(I40E_RX_DESC_ERROR_HBO_SHIFT);
+ skb = i40evf_fetch_rx_buffer(rx_ring, rx_desc);
+ if (!skb)
+ break;
- rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
- I40E_RXD_QW1_PTYPE_SHIFT;
- rx_bi->skb = NULL;
cleaned_count++;
- /* Get the header and possibly the whole packet
- * If this is an skb from previous receive dma will be 0
- */
- skb_put(skb, rx_packet_len);
- dma_unmap_single(rx_ring->dev, rx_bi->dma, rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- rx_bi->dma = 0;
-
- I40E_RX_INCREMENT(rx_ring, i);
-
- if (unlikely(
- !(rx_status & BIT(I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
- rx_ring->rx_stats.non_eop_descs++;
+ if (i40e_is_non_eop(rx_ring, rx_desc, skb))
continue;
- }
- /* ERR_MASK will only have valid bits if EOP set */
- if (unlikely(rx_error & BIT(I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+ /* ERR_MASK will only have valid bits if EOP set, and
+ * what we are doing here is actually checking
+ * I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
+ * the error field
+ */
+ if (unlikely(i40e_test_staterr(rx_desc, BIT(I40E_RXD_QW1_ERROR_SHIFT)))) {
dev_kfree_skb_any(skb);
continue;
}
- i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
+ if (i40e_cleanup_headers(rx_ring, skb))
+ continue;
+
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
- total_rx_packets++;
- skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+ /* populate checksum, VLAN, and protocol */
+ i40evf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
- i40e_rx_checksum(vsi, skb, rx_status, rx_error, rx_ptype);
- vlan_tag = rx_status & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
- ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
- : 0;
+ vlan_tag = (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) ?
+ le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1) : 0;
+
i40e_receive_skb(rx_ring, skb, vlan_tag);
- rx_desc->wb.qword1.status_error_len = 0;
- } while (likely(total_rx_packets < budget));
+ /* update budget accounting */
+ total_rx_packets++;
+ }
u64_stats_update_begin(&rx_ring->syncp);
rx_ring->stats.packets += total_rx_packets;
@@ -1293,6 +1289,7 @@ static int i40e_clean_rx_irq_1buf(struct i40e_ring *rx_ring, int budget)
rx_ring->q_vector->rx.total_packets += total_rx_packets;
rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+ /* guarantee a trip back through this routine if there was a failure */
return failure ? budget : total_rx_packets;
}
@@ -1434,12 +1431,7 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
i40e_for_each_ring(ring, q_vector->rx) {
- int cleaned;
-
- if (ring_is_ps_enabled(ring))
- cleaned = i40e_clean_rx_irq_ps(ring, budget_per_ring);
- else
- cleaned = i40e_clean_rx_irq_1buf(ring, budget_per_ring);
+ int cleaned = i40e_clean_rx_irq(ring, budget_per_ring);
work_done += cleaned;
/* if we clean as many as budgeted, we must not be done */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 54b52e8f7097..0112277e5882 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -102,8 +102,8 @@ enum i40e_dyn_idx_t {
(((pf)->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) ? \
I40E_DEFAULT_RSS_HENA_EXPANDED : I40E_DEFAULT_RSS_HENA)
-/* Supported Rx Buffer Sizes */
-#define I40E_RXBUFFER_512 512 /* Used for packet split */
+/* Supported Rx Buffer Sizes (a multiple of 128) */
+#define I40E_RXBUFFER_256 256
#define I40E_RXBUFFER_2048 2048
#define I40E_RXBUFFER_3072 3072 /* For FCoE MTU of 2158 */
#define I40E_RXBUFFER_4096 4096
@@ -114,9 +114,28 @@ enum i40e_dyn_idx_t {
* reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
* this adds up to 512 bytes of extra data meaning the smallest allocation
* we could have is 1K.
- * i.e. RXBUFFER_512 --> size-1024 slab
+ * i.e. RXBUFFER_256 --> 960 byte skb (size-1024 slab)
+ * i.e. RXBUFFER_512 --> 1216 byte skb (size-2048 slab)
*/
-#define I40E_RX_HDR_SIZE I40E_RXBUFFER_512
+#define I40E_RX_HDR_SIZE I40E_RXBUFFER_256
+#define i40e_rx_desc i40e_32byte_rx_desc
+
+/**
+ * i40e_test_staterr - tests bits in Rx descriptor status and error fields
+ * @rx_desc: pointer to receive descriptor (in le64 format)
+ * @stat_err_bits: value to mask
+ *
+ * This function does some fast chicanery in order to return the
+ * value of the mask which is really only used for boolean tests.
+ * The status_error_len doesn't need to be shifted because it begins
+ * at offset zero.
+ */
+static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
+ const u64 stat_err_bits)
+{
+ return !!(rx_desc->wb.qword1.status_error_len &
+ cpu_to_le64(stat_err_bits));
+}
/* How many Rx Buffers do we bundle into one write to the hardware ? */
#define I40E_RX_BUFFER_WRITE 16 /* Must be power of 2 */
@@ -142,8 +161,6 @@ enum i40e_dyn_idx_t {
prefetch((n)); \
} while (0)
-#define i40e_rx_desc i40e_32byte_rx_desc
-
#define I40E_MAX_BUFFER_TXD 8
#define I40E_MIN_TX_LEN 17
@@ -212,10 +229,8 @@ struct i40e_tx_buffer {
struct i40e_rx_buffer {
struct sk_buff *skb;
- void *hdr_buf;
dma_addr_t dma;
struct page *page;
- dma_addr_t page_dma;
unsigned int page_offset;
};
@@ -244,22 +259,18 @@ struct i40e_rx_queue_stats {
enum i40e_ring_state_t {
__I40E_TX_FDIR_INIT_DONE,
__I40E_TX_XPS_INIT_DONE,
- __I40E_RX_PS_ENABLED,
- __I40E_RX_16BYTE_DESC_ENABLED,
};
-#define ring_is_ps_enabled(ring) \
- test_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define set_ring_ps_enabled(ring) \
- set_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define clear_ring_ps_enabled(ring) \
- clear_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
-#define ring_is_16byte_desc_enabled(ring) \
- test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
-#define set_ring_16byte_desc_enabled(ring) \
- set_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
-#define clear_ring_16byte_desc_enabled(ring) \
- clear_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
+/* some useful defines for virtchannel interface, which
+ * is the only remaining user of header split
+ */
+#define I40E_RX_DTYPE_NO_SPLIT 0
+#define I40E_RX_DTYPE_HEADER_SPLIT 1
+#define I40E_RX_DTYPE_SPLIT_ALWAYS 2
+#define I40E_RX_SPLIT_L2 0x1
+#define I40E_RX_SPLIT_IP 0x2
+#define I40E_RX_SPLIT_TCP_UDP 0x4
+#define I40E_RX_SPLIT_SCTP 0x8
/* struct that defines a descriptor ring, associated with a VSI */
struct i40e_ring {
@@ -278,16 +289,7 @@ struct i40e_ring {
u16 count; /* Number of descriptors */
u16 reg_idx; /* HW register index of the ring */
- u16 rx_hdr_len;
u16 rx_buf_len;
- u8 dtype;
-#define I40E_RX_DTYPE_NO_SPLIT 0
-#define I40E_RX_DTYPE_HEADER_SPLIT 1
-#define I40E_RX_DTYPE_SPLIT_ALWAYS 2
-#define I40E_RX_SPLIT_L2 0x1
-#define I40E_RX_SPLIT_IP 0x2
-#define I40E_RX_SPLIT_TCP_UDP 0x4
-#define I40E_RX_SPLIT_SCTP 0x8
/* used in interrupt processing */
u16 next_to_use;
@@ -319,6 +321,7 @@ struct i40e_ring {
struct i40e_q_vector *q_vector; /* Backreference to associated vector */
struct rcu_head rcu; /* to avoid race on free */
+ u16 next_to_alloc;
} ____cacheline_internodealigned_in_smp;
enum i40e_latency_range {
@@ -342,9 +345,7 @@ struct i40e_ring_container {
#define i40e_for_each_ring(pos, head) \
for (pos = (head).ring; pos != NULL; pos = pos->next)
-bool i40evf_alloc_rx_buffers_ps(struct i40e_ring *rxr, u16 cleaned_count);
-bool i40evf_alloc_rx_buffers_1buf(struct i40e_ring *rxr, u16 cleaned_count);
-void i40evf_alloc_rx_headers(struct i40e_ring *rxr);
+bool i40evf_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
netdev_tx_t i40evf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
void i40evf_clean_tx_ring(struct i40e_ring *tx_ring);
void i40evf_clean_rx_ring(struct i40e_ring *rx_ring);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index 25afabf999d0..fa044a904208 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -80,9 +80,6 @@ struct i40e_vsi {
#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32
/* Supported Rx Buffer Sizes */
-#define I40EVF_RXBUFFER_64 64 /* Used for packet split */
-#define I40EVF_RXBUFFER_128 128 /* Used for packet split */
-#define I40EVF_RXBUFFER_256 256 /* Used for packet split */
#define I40EVF_RXBUFFER_2048 2048
#define I40EVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */
#define I40EVF_MAX_AQ_BUF_SIZE 4096
@@ -208,9 +205,6 @@ struct i40evf_adapter {
u32 flags;
#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0)
-#define I40EVF_FLAG_RX_1BUF_CAPABLE BIT(1)
-#define I40EVF_FLAG_RX_PS_CAPABLE BIT(2)
-#define I40EVF_FLAG_RX_PS_ENABLED BIT(3)
#define I40EVF_FLAG_IMIR_ENABLED BIT(5)
#define I40EVF_FLAG_MQ_CAPABLE BIT(6)
#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7)
@@ -295,7 +289,6 @@ struct i40evf_adapter {
/* Ethtool Private Flags */
-#define I40EVF_PRIV_FLAGS_PS BIT(0)
/* needed by i40evf_ethtool.c */
extern char i40evf_driver_name[];
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 5a48ee07688f..c9c202f6c521 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -63,12 +63,6 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = {
#define I40EVF_STATS_LEN(_dev) \
(I40EVF_GLOBAL_STATS_LEN + I40EVF_QUEUE_STATS_LEN(_dev))
-static const char i40evf_priv_flags_strings[][ETH_GSTRING_LEN] = {
- "packet-split",
-};
-
-#define I40EVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40evf_priv_flags_strings)
-
/**
* i40evf_get_settings - Get Link Speed and Duplex settings
* @netdev: network interface device structure
@@ -103,8 +97,6 @@ static int i40evf_get_sset_count(struct net_device *netdev, int sset)
{
if (sset == ETH_SS_STATS)
return I40EVF_STATS_LEN(netdev);
- else if (sset == ETH_SS_PRIV_FLAGS)
- return I40EVF_PRIV_FLAGS_STR_LEN;
else
return -EINVAL;
}
@@ -170,12 +162,6 @@ static void i40evf_get_strings(struct net_device *netdev, u32 sset, u8 *data)
snprintf(p, ETH_GSTRING_LEN, "rx-%u.bytes", i);
p += ETH_GSTRING_LEN;
}
- } else if (sset == ETH_SS_PRIV_FLAGS) {
- for (i = 0; i < I40EVF_PRIV_FLAGS_STR_LEN; i++) {
- memcpy(data, i40evf_priv_flags_strings[i],
- ETH_GSTRING_LEN);
- data += ETH_GSTRING_LEN;
- }
}
}
@@ -225,7 +211,6 @@ static void i40evf_get_drvinfo(struct net_device *netdev,
strlcpy(drvinfo->version, i40evf_driver_version, 32);
strlcpy(drvinfo->fw_version, "N/A", 4);
strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), 32);
- drvinfo->n_priv_flags = I40EVF_PRIV_FLAGS_STR_LEN;
}
/**
@@ -515,54 +500,6 @@ static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
return i40evf_config_rss(adapter);
}
-/**
- * i40evf_get_priv_flags - report device private flags
- * @dev: network interface device structure
- *
- * The get string set count and the string set should be matched for each
- * flag returned. Add new strings for each flag to the i40e_priv_flags_strings
- * array.
- *
- * Returns a u32 bitmap of flags.
- **/
-static u32 i40evf_get_priv_flags(struct net_device *dev)
-{
- struct i40evf_adapter *adapter = netdev_priv(dev);
- u32 ret_flags = 0;
-
- ret_flags |= adapter->flags & I40EVF_FLAG_RX_PS_ENABLED ?
- I40EVF_PRIV_FLAGS_PS : 0;
-
- return ret_flags;
-}
-
-/**
- * i40evf_set_priv_flags - set private flags
- * @dev: network interface device structure
- * @flags: bit flags to be set
- **/
-static int i40evf_set_priv_flags(struct net_device *dev, u32 flags)
-{
- struct i40evf_adapter *adapter = netdev_priv(dev);
- bool reset_required = false;
-
- if ((flags & I40EVF_PRIV_FLAGS_PS) &&
- !(adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) {
- adapter->flags |= I40EVF_FLAG_RX_PS_ENABLED;
- reset_required = true;
- } else if (!(flags & I40EVF_PRIV_FLAGS_PS) &&
- (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED)) {
- adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED;
- reset_required = true;
- }
-
- /* if needed, issue reset to cause things to take effect */
- if (reset_required)
- i40evf_schedule_reset(adapter);
-
- return 0;
-}
-
static const struct ethtool_ops i40evf_ethtool_ops = {
.get_settings = i40evf_get_settings,
.get_drvinfo = i40evf_get_drvinfo,
@@ -572,8 +509,6 @@ static const struct ethtool_ops i40evf_ethtool_ops = {
.get_strings = i40evf_get_strings,
.get_ethtool_stats = i40evf_get_ethtool_stats,
.get_sset_count = i40evf_get_sset_count,
- .get_priv_flags = i40evf_get_priv_flags,
- .set_priv_flags = i40evf_set_priv_flags,
.get_msglevel = i40evf_get_msglevel,
.set_msglevel = i40evf_set_msglevel,
.get_coalesce = i40evf_get_coalesce,
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 9f0bd7acc22a..b548dbe78cd3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -641,28 +641,11 @@ static void i40evf_configure_tx(struct i40evf_adapter *adapter)
static void i40evf_configure_rx(struct i40evf_adapter *adapter)
{
struct i40e_hw *hw = &adapter->hw;
- struct net_device *netdev = adapter->netdev;
- int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
int i;
- int rx_buf_len;
-
-
- /* Set the RX buffer length according to the mode */
- if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED ||
- netdev->mtu <= ETH_DATA_LEN)
- rx_buf_len = I40EVF_RXBUFFER_2048;
- else
- rx_buf_len = ALIGN(max_frame, 1024);
for (i = 0; i < adapter->num_active_queues; i++) {
adapter->rx_rings[i].tail = hw->hw_addr + I40E_QRX_TAIL1(i);
- adapter->rx_rings[i].rx_buf_len = rx_buf_len;
- if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) {
- set_ring_ps_enabled(&adapter->rx_rings[i]);
- adapter->rx_rings[i].rx_hdr_len = I40E_RX_HDR_SIZE;
- } else {
- clear_ring_ps_enabled(&adapter->rx_rings[i]);
- }
+ adapter->rx_rings[i].rx_buf_len = I40EVF_RXBUFFER_2048;
}
}
@@ -1007,14 +990,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter)
for (i = 0; i < adapter->num_active_queues; i++) {
struct i40e_ring *ring = &adapter->rx_rings[i];
- if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) {
- i40evf_alloc_rx_headers(ring);
- i40evf_alloc_rx_buffers_ps(ring, ring->count);
- } else {
- i40evf_alloc_rx_buffers_1buf(ring, ring->count);
- }
- ring->next_to_use = ring->count - 1;
- writel(ring->next_to_use, ring->tail);
+ i40evf_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
}
}
@@ -2423,11 +2399,6 @@ static void i40evf_init_task(struct work_struct *work)
adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN;
adapter->flags |= I40EVF_FLAG_RX_CSUM_ENABLED;
- adapter->flags |= I40EVF_FLAG_RX_1BUF_CAPABLE;
- adapter->flags |= I40EVF_FLAG_RX_PS_CAPABLE;
-
- /* Default to single buffer rx, can be changed through ethtool. */
- adapter->flags &= ~I40EVF_FLAG_RX_PS_ENABLED;
netdev->netdev_ops = &i40evf_netdev_ops;
i40evf_set_ethtool_ops(netdev);
@@ -2795,7 +2766,6 @@ static void i40evf_remove(struct pci_dev *pdev)
iounmap(hw->hw_addr);
pci_release_regions(pdev);
-
i40evf_free_all_tx_resources(adapter);
i40evf_free_all_rx_resources(adapter);
i40evf_free_queues(adapter);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index ba7fbc0608a6..c5d33a2cea87 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -270,10 +270,6 @@ void i40evf_configure_queues(struct i40evf_adapter *adapter)
vqpi->rxq.max_pkt_size = adapter->netdev->mtu
+ ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN;
vqpi->rxq.databuffer_size = adapter->rx_rings[i].rx_buf_len;
- if (adapter->flags & I40EVF_FLAG_RX_PS_ENABLED) {
- vqpi->rxq.splithdr_enabled = true;
- vqpi->rxq.hdr_size = I40E_RX_HDR_SIZE;
- }
vqpi++;
}