summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/sfc/efx.c34
-rw-r--r--drivers/net/ethernet/sfc/efx.h13
-rw-r--r--drivers/net/ethernet/sfc/ethtool.c4
-rw-r--r--drivers/net/ethernet/sfc/falcon.c17
-rw-r--r--drivers/net/ethernet/sfc/filter.c74
-rw-r--r--drivers/net/ethernet/sfc/net_driver.h35
-rw-r--r--drivers/net/ethernet/sfc/nic.c90
-rw-r--r--drivers/net/ethernet/sfc/rx.c211
-rw-r--r--drivers/net/ethernet/sfc/siena.c3
9 files changed, 363 insertions, 118 deletions
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index f8013c3ea37c..1213af5024d1 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -88,8 +88,6 @@ const char *const efx_reset_type_names[] = {
[RESET_TYPE_MC_FAILURE] = "MC_FAILURE",
};
-#define EFX_MAX_MTU (9 * 1024)
-
/* Reset workqueue. If any NIC has a hardware failure then a reset will be
* queued onto this work queue. This is not a per-nic work queue, because
* efx_reset_work() acquires the rtnl lock, so resets are naturally serialised.
@@ -627,9 +625,11 @@ fail:
*/
static void efx_start_datapath(struct efx_nic *efx)
{
+ bool old_rx_scatter = efx->rx_scatter;
struct efx_tx_queue *tx_queue;
struct efx_rx_queue *rx_queue;
struct efx_channel *channel;
+ size_t rx_buf_len;
/* Calculate the rx buffer allocation parameters required to
* support the current MTU, including padding for header
@@ -638,8 +638,32 @@ static void efx_start_datapath(struct efx_nic *efx)
efx->rx_dma_len = (efx->type->rx_buffer_hash_size +
EFX_MAX_FRAME_LEN(efx->net_dev->mtu) +
efx->type->rx_buffer_padding);
- efx->rx_buffer_order = get_order(sizeof(struct efx_rx_page_state) +
- EFX_PAGE_IP_ALIGN + efx->rx_dma_len);
+ rx_buf_len = (sizeof(struct efx_rx_page_state) +
+ EFX_PAGE_IP_ALIGN + efx->rx_dma_len);
+ if (rx_buf_len <= PAGE_SIZE) {
+ efx->rx_scatter = false;
+ efx->rx_buffer_order = 0;
+ if (rx_buf_len <= PAGE_SIZE / 2)
+ efx->rx_buffer_truesize = PAGE_SIZE / 2;
+ else
+ efx->rx_buffer_truesize = PAGE_SIZE;
+ } else if (efx->type->can_rx_scatter) {
+ BUILD_BUG_ON(sizeof(struct efx_rx_page_state) +
+ EFX_PAGE_IP_ALIGN + EFX_RX_USR_BUF_SIZE >
+ PAGE_SIZE / 2);
+ efx->rx_scatter = true;
+ efx->rx_dma_len = EFX_RX_USR_BUF_SIZE;
+ efx->rx_buffer_order = 0;
+ efx->rx_buffer_truesize = PAGE_SIZE / 2;
+ } else {
+ efx->rx_scatter = false;
+ efx->rx_buffer_order = get_order(rx_buf_len);
+ efx->rx_buffer_truesize = PAGE_SIZE << efx->rx_buffer_order;
+ }
+
+ /* RX filters also have scatter-enabled flags */
+ if (efx->rx_scatter != old_rx_scatter)
+ efx_filter_update_rx_scatter(efx);
/* We must keep at least one descriptor in a TX ring empty.
* We could avoid this when the queue size does not exactly
@@ -661,7 +685,7 @@ static void efx_start_datapath(struct efx_nic *efx)
efx_nic_generate_fill_event(rx_queue);
}
- WARN_ON(channel->rx_pkt != NULL);
+ WARN_ON(channel->rx_pkt_n_frags);
}
if (netif_device_present(efx->net_dev))
diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index 00e7077fa1d8..211da79a65e8 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -39,16 +39,14 @@ extern void efx_init_rx_queue(struct efx_rx_queue *rx_queue);
extern void efx_fini_rx_queue(struct efx_rx_queue *rx_queue);
extern void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue);
extern void efx_rx_slow_fill(unsigned long context);
-extern void __efx_rx_packet(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf);
-extern void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
+extern void __efx_rx_packet(struct efx_channel *channel);
+extern void efx_rx_packet(struct efx_rx_queue *rx_queue,
+ unsigned int index, unsigned int n_frags,
unsigned int len, u16 flags);
static inline void efx_rx_flush_packet(struct efx_channel *channel)
{
- if (channel->rx_pkt) {
- __efx_rx_packet(channel, channel->rx_pkt);
- channel->rx_pkt = NULL;
- }
+ if (channel->rx_pkt_n_frags)
+ __efx_rx_packet(channel);
}
extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
@@ -73,6 +71,7 @@ extern void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
extern int efx_probe_filters(struct efx_nic *efx);
extern void efx_restore_filters(struct efx_nic *efx);
extern void efx_remove_filters(struct efx_nic *efx);
+extern void efx_filter_update_rx_scatter(struct efx_nic *efx);
extern s32 efx_filter_insert_filter(struct efx_nic *efx,
struct efx_filter_spec *spec,
bool replace);
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 8e61cd06f66a..6e768175e7e0 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -154,6 +154,7 @@ static const struct efx_ethtool_stat efx_ethtool_stats[] = {
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+ EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_nodesc_trunc),
};
/* Number of ethtool statistics */
@@ -978,7 +979,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx,
rule->m_ext.data[1]))
return -EINVAL;
- efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL, 0,
+ efx_filter_init_rx(&spec, EFX_FILTER_PRI_MANUAL,
+ efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0,
(rule->ring_cookie == RX_CLS_FLOW_DISC) ?
0xfff : rule->ring_cookie);
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c
index 49bcd196e10d..4486102fa9b3 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon.c
@@ -1546,10 +1546,6 @@ static int falcon_probe_nic(struct efx_nic *efx)
static void falcon_init_rx_cfg(struct efx_nic *efx)
{
- /* Prior to Siena the RX DMA engine will split each frame at
- * intervals of RX_USR_BUF_SIZE (32-byte units). We set it to
- * be so large that that never happens. */
- const unsigned huge_buf_size = (3 * 4096) >> 5;
/* RX control FIFO thresholds (32 entries) */
const unsigned ctrl_xon_thr = 20;
const unsigned ctrl_xoff_thr = 25;
@@ -1557,10 +1553,15 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
efx_reado(efx, &reg, FR_AZ_RX_CFG);
if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
- /* Data FIFO size is 5.5K */
+ /* Data FIFO size is 5.5K. The RX DMA engine only
+ * supports scattering for user-mode queues, but will
+ * split DMA writes at intervals of RX_USR_BUF_SIZE
+ * (32-byte units) even for kernel-mode queues. We
+ * set it to be so large that that never happens.
+ */
EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
- huge_buf_size);
+ (3 * 4096) >> 5);
EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
@@ -1569,7 +1570,7 @@ static void falcon_init_rx_cfg(struct efx_nic *efx)
/* Data FIFO size is 80K; register fields moved */
EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
- huge_buf_size);
+ EFX_RX_USR_BUF_SIZE >> 5);
/* Send XON and XOFF at ~3 * max MTU away from empty/full */
EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
@@ -1815,6 +1816,7 @@ const struct efx_nic_type falcon_a1_nic_type = {
.evq_rptr_tbl_base = FR_AA_EVQ_RPTR_KER,
.max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
.rx_buffer_padding = 0x24,
+ .can_rx_scatter = false,
.max_interrupt_mode = EFX_INT_MODE_MSI,
.phys_addr_channels = 4,
.timer_period_max = 1 << FRF_AB_TC_TIMER_VAL_WIDTH,
@@ -1865,6 +1867,7 @@ const struct efx_nic_type falcon_b0_nic_type = {
.max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
.rx_buffer_hash_size = 0x10,
.rx_buffer_padding = 0,
+ .can_rx_scatter = true,
.max_interrupt_mode = EFX_INT_MODE_MSIX,
.phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
* interrupt handler only supports 32
diff --git a/drivers/net/ethernet/sfc/filter.c b/drivers/net/ethernet/sfc/filter.c
index 61b4408bbdb8..2397f0e8d3eb 100644
--- a/drivers/net/ethernet/sfc/filter.c
+++ b/drivers/net/ethernet/sfc/filter.c
@@ -172,6 +172,25 @@ static void efx_filter_push_rx_config(struct efx_nic *efx)
filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
!!(table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
EFX_FILTER_FLAG_RX_RSS));
+
+ /* There is a single bit to enable RX scatter for all
+ * unmatched packets. Only set it if scatter is
+ * enabled in both filter specs.
+ */
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+ !!(table->spec[EFX_FILTER_INDEX_UC_DEF].flags &
+ table->spec[EFX_FILTER_INDEX_MC_DEF].flags &
+ EFX_FILTER_FLAG_RX_SCATTER));
+ } else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
+ /* We don't expose 'default' filters because unmatched
+ * packets always go to the queue number found in the
+ * RSS table. But we still need to set the RX scatter
+ * bit here.
+ */
+ EFX_SET_OWORD_FIELD(
+ filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+ efx->rx_scatter);
}
efx_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
@@ -413,13 +432,18 @@ static void efx_filter_reset_rx_def(struct efx_nic *efx, unsigned filter_idx)
struct efx_filter_state *state = efx->filter_state;
struct efx_filter_table *table = &state->table[EFX_FILTER_TABLE_RX_DEF];
struct efx_filter_spec *spec = &table->spec[filter_idx];
+ enum efx_filter_flags flags = 0;
/* If there's only one channel then disable RSS for non VF
* traffic, thereby allowing VFs to use RSS when the PF can't.
*/
- efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL,
- efx->n_rx_channels > 1 ? EFX_FILTER_FLAG_RX_RSS : 0,
- 0);
+ if (efx->n_rx_channels > 1)
+ flags |= EFX_FILTER_FLAG_RX_RSS;
+
+ if (efx->rx_scatter)
+ flags |= EFX_FILTER_FLAG_RX_SCATTER;
+
+ efx_filter_init_rx(spec, EFX_FILTER_PRI_MANUAL, flags, 0);
spec->type = EFX_FILTER_UC_DEF + filter_idx;
table->used_bitmap[0] |= 1 << filter_idx;
}
@@ -1101,6 +1125,50 @@ void efx_remove_filters(struct efx_nic *efx)
kfree(state);
}
+/* Update scatter enable flags for filters pointing to our own RX queues */
+void efx_filter_update_rx_scatter(struct efx_nic *efx)
+{
+ struct efx_filter_state *state = efx->filter_state;
+ enum efx_filter_table_id table_id;
+ struct efx_filter_table *table;
+ efx_oword_t filter;
+ unsigned int filter_idx;
+
+ spin_lock_bh(&state->lock);
+
+ for (table_id = EFX_FILTER_TABLE_RX_IP;
+ table_id <= EFX_FILTER_TABLE_RX_DEF;
+ table_id++) {
+ table = &state->table[table_id];
+
+ for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+ if (!test_bit(filter_idx, table->used_bitmap) ||
+ table->spec[filter_idx].dmaq_id >=
+ efx->n_rx_channels)
+ continue;
+
+ if (efx->rx_scatter)
+ table->spec[filter_idx].flags |=
+ EFX_FILTER_FLAG_RX_SCATTER;
+ else
+ table->spec[filter_idx].flags &=
+ ~EFX_FILTER_FLAG_RX_SCATTER;
+
+ if (table_id == EFX_FILTER_TABLE_RX_DEF)
+ /* Pushed by efx_filter_push_rx_config() */
+ continue;
+
+ efx_filter_build(&filter, &table->spec[filter_idx]);
+ efx_writeo(efx, &filter,
+ table->offset + table->step * filter_idx);
+ }
+ }
+
+ efx_filter_push_rx_config(efx);
+
+ spin_unlock_bh(&state->lock);
+}
+
#ifdef CONFIG_RFS_ACCEL
int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 1bc911f980b5..e41b54bada7c 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -69,6 +69,12 @@
#define EFX_TXQ_TYPES 4
#define EFX_MAX_TX_QUEUES (EFX_TXQ_TYPES * EFX_MAX_CHANNELS)
+/* Maximum possible MTU the driver supports */
+#define EFX_MAX_MTU (9 * 1024)
+
+/* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page. */
+#define EFX_RX_USR_BUF_SIZE 1824
+
/* Forward declare Precision Time Protocol (PTP) support structure. */
struct efx_ptp_data;
@@ -212,7 +218,8 @@ struct efx_tx_queue {
* If completed: offset in @page of Ethernet header.
* @len: If pending: length for DMA descriptor.
* If completed: received length, excluding hash prefix.
- * @flags: Flags for buffer and packet state.
+ * @flags: Flags for buffer and packet state. These are only set on the
+ * first buffer of a scattered packet.
*/
struct efx_rx_buffer {
dma_addr_t dma_addr;
@@ -256,6 +263,7 @@ struct efx_rx_page_state {
* @added_count: Number of buffers added to the receive queue.
* @notified_count: Number of buffers given to NIC (<= @added_count).
* @removed_count: Number of buffers removed from the receive queue.
+ * @scatter_n: Number of buffers used by current packet
* @max_fill: RX descriptor maximum fill level (<= ring size)
* @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
* (<= @max_fill)
@@ -276,6 +284,7 @@ struct efx_rx_queue {
unsigned int added_count;
unsigned int notified_count;
unsigned int removed_count;
+ unsigned int scatter_n;
unsigned int max_fill;
unsigned int fast_fill_trigger;
unsigned int min_fill;
@@ -335,6 +344,12 @@ enum efx_rx_alloc_method {
* @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
* @n_rx_overlength: Count of RX_OVERLENGTH errors
* @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
+ * lack of descriptors
+ * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
+ * __efx_rx_packet(), or zero if there is none
+ * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
+ * by __efx_rx_packet(), if @rx_pkt_n_frags != 0
* @rx_queue: RX queue for this channel
* @tx_queue: TX queues for this channel
*/
@@ -366,11 +381,10 @@ struct efx_channel {
unsigned n_rx_frm_trunc;
unsigned n_rx_overlength;
unsigned n_skbuff_leaks;
+ unsigned int n_rx_nodesc_trunc;
- /* Used to pipeline received packets in order to optimise memory
- * access with prefetches.
- */
- struct efx_rx_buffer *rx_pkt;
+ unsigned int rx_pkt_n_frags;
+ unsigned int rx_pkt_index;
struct efx_rx_queue rx_queue;
struct efx_tx_queue tx_queue[EFX_TXQ_TYPES];
@@ -672,8 +686,11 @@ struct vfdi_status;
* @n_tx_channels: Number of channels used for TX
* @rx_dma_len: Current maximum RX DMA length
* @rx_buffer_order: Order (log2) of number of pages for each RX buffer
+ * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
+ * for use in sk_buff::truesize
* @rx_hash_key: Toeplitz hash key for RSS
* @rx_indir_table: Indirection table for RSS
+ * @rx_scatter: Scatter mode enabled for receives
* @int_error_count: Number of internal errors seen recently
* @int_error_expire: Time at which error count will be expired
* @irq_status: Interrupt status buffer
@@ -788,8 +805,10 @@ struct efx_nic {
unsigned n_tx_channels;
unsigned int rx_dma_len;
unsigned int rx_buffer_order;
+ unsigned int rx_buffer_truesize;
u8 rx_hash_key[40];
u32 rx_indir_table[128];
+ bool rx_scatter;
unsigned int_error_count;
unsigned long int_error_expire;
@@ -920,8 +939,9 @@ static inline unsigned int efx_port_num(struct efx_nic *efx)
* @evq_ptr_tbl_base: Event queue pointer table base address
* @evq_rptr_tbl_base: Event queue read-pointer table base address
* @max_dma_mask: Maximum possible DMA mask
- * @rx_buffer_hash_size: Size of hash at start of RX buffer
- * @rx_buffer_padding: Size of padding at end of RX buffer
+ * @rx_buffer_hash_size: Size of hash at start of RX packet
+ * @rx_buffer_padding: Size of padding at end of RX packet
+ * @can_rx_scatter: NIC is able to scatter packet to multiple buffers
* @max_interrupt_mode: Highest capability interrupt mode supported
* from &enum efx_init_mode.
* @phys_addr_channels: Number of channels with physically addressed
@@ -969,6 +989,7 @@ struct efx_nic_type {
u64 max_dma_mask;
unsigned int rx_buffer_hash_size;
unsigned int rx_buffer_padding;
+ bool can_rx_scatter;
unsigned int max_interrupt_mode;
unsigned int phys_addr_channels;
unsigned int timer_period_max;
diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c
index 0ad790cc473c..f9f5df8b51fe 100644
--- a/drivers/net/ethernet/sfc/nic.c
+++ b/drivers/net/ethernet/sfc/nic.c
@@ -591,12 +591,22 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
struct efx_nic *efx = rx_queue->efx;
bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0;
bool iscsi_digest_en = is_b0;
+ bool jumbo_en;
+
+ /* For kernel-mode queues in Falcon A1, the JUMBO flag enables
+ * DMA to continue after a PCIe page boundary (and scattering
+ * is not possible). In Falcon B0 and Siena, it enables
+ * scatter.
+ */
+ jumbo_en = !is_b0 || efx->rx_scatter;
netif_dbg(efx, hw, efx->net_dev,
"RX queue %d ring in special buffers %d-%d\n",
efx_rx_queue_index(rx_queue), rx_queue->rxd.index,
rx_queue->rxd.index + rx_queue->rxd.entries - 1);
+ rx_queue->scatter_n = 0;
+
/* Pin RX descriptor ring */
efx_init_special_buffer(efx, &rx_queue->rxd);
@@ -613,8 +623,7 @@ void efx_nic_init_rx(struct efx_rx_queue *rx_queue)
FRF_AZ_RX_DESCQ_SIZE,
__ffs(rx_queue->rxd.entries),
FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
- /* For >=B0 this is scatter so disable */
- FRF_AZ_RX_DESCQ_JUMBO, !is_b0,
+ FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
FRF_AZ_RX_DESCQ_EN, 1);
efx_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
efx_rx_queue_index(rx_queue));
@@ -968,13 +977,24 @@ static u16 efx_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
EFX_RX_PKT_DISCARD : 0;
}
-/* Handle receive events that are not in-order. */
-static void
+/* Handle receive events that are not in-order. Return true if this
+ * can be handled as a partial packet discard, false if it's more
+ * serious.
+ */
+static bool
efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
{
+ struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
struct efx_nic *efx = rx_queue->efx;
unsigned expected, dropped;
+ if (rx_queue->scatter_n &&
+ index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
+ rx_queue->ptr_mask)) {
+ ++channel->n_rx_nodesc_trunc;
+ return true;
+ }
+
expected = rx_queue->removed_count & rx_queue->ptr_mask;
dropped = (index - expected) & rx_queue->ptr_mask;
netif_info(efx, rx_err, efx->net_dev,
@@ -983,6 +1003,7 @@ efx_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+ return false;
}
/* Handle a packet received event
@@ -998,7 +1019,7 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
unsigned expected_ptr;
- bool rx_ev_pkt_ok;
+ bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
u16 flags;
struct efx_rx_queue *rx_queue;
struct efx_nic *efx = channel->efx;
@@ -1006,21 +1027,56 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
if (unlikely(ACCESS_ONCE(efx->reset_pending)))
return;
- /* Basic packet information */
- rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
- rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
- rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
- WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT));
- WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP) != 1);
+ rx_ev_cont = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
+ rx_ev_sop = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
WARN_ON(EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
channel->channel);
rx_queue = efx_channel_get_rx_queue(channel);
rx_ev_desc_ptr = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
- expected_ptr = rx_queue->removed_count & rx_queue->ptr_mask;
- if (unlikely(rx_ev_desc_ptr != expected_ptr))
- efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr);
+ expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
+ rx_queue->ptr_mask);
+
+ /* Check for partial drops and other errors */
+ if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
+ unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
+ if (rx_ev_desc_ptr != expected_ptr &&
+ !efx_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
+ return;
+
+ /* Discard all pending fragments */
+ if (rx_queue->scatter_n) {
+ efx_rx_packet(
+ rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ rx_queue->scatter_n, 0, EFX_RX_PKT_DISCARD);
+ rx_queue->removed_count += rx_queue->scatter_n;
+ rx_queue->scatter_n = 0;
+ }
+
+ /* Return if there is no new fragment */
+ if (rx_ev_desc_ptr != expected_ptr)
+ return;
+
+ /* Discard new fragment if not SOP */
+ if (!rx_ev_sop) {
+ efx_rx_packet(
+ rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ 1, 0, EFX_RX_PKT_DISCARD);
+ ++rx_queue->removed_count;
+ return;
+ }
+ }
+
+ ++rx_queue->scatter_n;
+ if (rx_ev_cont)
+ return;
+
+ rx_ev_byte_cnt = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
+ rx_ev_pkt_ok = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
+ rx_ev_hdr_type = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
if (likely(rx_ev_pkt_ok)) {
/* If packet is marked as OK and packet type is TCP/IP or
@@ -1048,7 +1104,11 @@ efx_handle_rx_event(struct efx_channel *channel, const efx_qword_t *event)
channel->irq_mod_score += 2;
/* Handle received packet */
- efx_rx_packet(rx_queue, rx_ev_desc_ptr, rx_ev_byte_cnt, flags);
+ efx_rx_packet(rx_queue,
+ rx_queue->removed_count & rx_queue->ptr_mask,
+ rx_queue->scatter_n, rx_ev_byte_cnt, flags);
+ rx_queue->removed_count += rx_queue->scatter_n;
+ rx_queue->scatter_n = 0;
}
/* If this flush done event corresponds to a &struct efx_tx_queue, then
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 04518722ac1d..88aa1ff01e3f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -39,13 +39,17 @@
*/
static unsigned int rx_refill_threshold;
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EFX_RX_MAX_FRAGS DIV_ROUND_UP(EFX_MAX_FRAME_LEN(EFX_MAX_MTU), \
+ EFX_RX_USR_BUF_SIZE)
+
/*
* RX maximum head room required.
*
- * This must be at least 1 to prevent overflow and at least 2 to allow
- * pipelined receives.
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
*/
-#define EFX_RXD_HEAD_ROOM 2
+#define EFX_RXD_HEAD_ROOM (1 + EFX_RX_MAX_FRAGS)
static inline u8 *efx_rx_buf_va(struct efx_rx_buffer *buf)
{
@@ -66,6 +70,15 @@ static inline u32 efx_rx_buf_hash(const u8 *eh)
#endif
}
+static inline struct efx_rx_buffer *
+efx_rx_buf_next(struct efx_rx_queue *rx_queue, struct efx_rx_buffer *rx_buf)
+{
+ if (unlikely(rx_buf == efx_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+ return efx_rx_buffer(rx_queue, 0);
+ else
+ return rx_buf + 1;
+}
+
/**
* efx_init_rx_buffers - create EFX_RX_BATCH page-based RX buffers
*
@@ -199,28 +212,34 @@ static void efx_resurrect_rx_buffer(struct efx_rx_queue *rx_queue,
++rx_queue->added_count;
}
-/* Recycle the given rx buffer directly back into the rx_queue. There is
- * always room to add this buffer, because we've just popped a buffer. */
-static void efx_recycle_rx_buffer(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf)
+/* Recycle buffers directly back into the rx_queue. There is always
+ * room to add these buffer, because we've just popped them.
+ */
+static void efx_recycle_rx_buffers(struct efx_channel *channel,
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
{
struct efx_nic *efx = channel->efx;
struct efx_rx_queue *rx_queue = efx_channel_get_rx_queue(channel);
struct efx_rx_buffer *new_buf;
unsigned index;
- rx_buf->flags = 0;
+ do {
+ rx_buf->flags = 0;
- if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
- page_count(rx_buf->page) == 1)
- efx_resurrect_rx_buffer(rx_queue, rx_buf);
+ if (efx->rx_dma_len <= EFX_RX_HALF_PAGE &&
+ page_count(rx_buf->page) == 1)
+ efx_resurrect_rx_buffer(rx_queue, rx_buf);
- index = rx_queue->added_count & rx_queue->ptr_mask;
- new_buf = efx_rx_buffer(rx_queue, index);
+ index = rx_queue->added_count & rx_queue->ptr_mask;
+ new_buf = efx_rx_buffer(rx_queue, index);
- memcpy(new_buf, rx_buf, sizeof(*new_buf));
- rx_buf->page = NULL;
- ++rx_queue->added_count;
+ memcpy(new_buf, rx_buf, sizeof(*new_buf));
+ rx_buf->page = NULL;
+ ++rx_queue->added_count;
+
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ } while (--n_frags);
}
/**
@@ -328,46 +347,56 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
/* Pass a received packet up through GRO. GRO can handle pages
* regardless of checksum state and skbs with a good checksum.
*/
-static void efx_rx_packet_gro(struct efx_channel *channel,
- struct efx_rx_buffer *rx_buf,
- const u8 *eh)
+static void
+efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags, u8 *eh)
{
struct napi_struct *napi = &channel->napi_str;
gro_result_t gro_result;
struct efx_nic *efx = channel->efx;
- struct page *page = rx_buf->page;
struct sk_buff *skb;
- rx_buf->page = NULL;
-
skb = napi_get_frags(napi);
- if (!skb) {
- put_page(page);
+ if (unlikely(!skb)) {
+ while (n_frags--) {
+ put_page(rx_buf->page);
+ rx_buf->page = NULL;
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
return;
}
if (efx->net_dev->features & NETIF_F_RXHASH)
skb->rxhash = efx_rx_buf_hash(eh);
-
- skb_fill_page_desc(skb, 0, page, rx_buf->page_offset, rx_buf->len);
-
- skb->len = rx_buf->len;
- skb->data_len = rx_buf->len;
- skb->truesize += rx_buf->len;
skb->ip_summed = ((rx_buf->flags & EFX_RX_PKT_CSUMMED) ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
- skb_record_rx_queue(skb, channel->rx_queue.core_index);
+ for (;;) {
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len);
+ rx_buf->page = NULL;
+ skb->len += rx_buf->len;
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
- gro_result = napi_gro_frags(napi);
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
+
+ skb->data_len = skb->len;
+ skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+ skb_record_rx_queue(skb, channel->rx_queue.core_index);
+ gro_result = napi_gro_frags(napi);
if (gro_result != GRO_DROP)
channel->irq_mod_score += 2;
}
-/* Allocate and construct an SKB around a struct page.*/
+/* Allocate and construct an SKB around page fragments */
static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags,
u8 *eh, int hdr_len)
{
struct efx_nic *efx = channel->efx;
@@ -381,25 +410,32 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
skb_reserve(skb, EFX_PAGE_SKB_ALIGN);
+ memcpy(__skb_put(skb, hdr_len), eh, hdr_len);
- skb->len = rx_buf->len;
- skb->truesize = rx_buf->len + sizeof(struct sk_buff);
- memcpy(skb->data, eh, hdr_len);
- skb->tail += hdr_len;
-
- /* Append the remaining page onto the frag list */
+ /* Append the remaining page(s) onto the frag list */
if (rx_buf->len > hdr_len) {
- skb->data_len = skb->len - hdr_len;
- skb_fill_page_desc(skb, 0, rx_buf->page,
- rx_buf->page_offset + hdr_len,
- skb->data_len);
+ rx_buf->page_offset += hdr_len;
+ rx_buf->len -= hdr_len;
+
+ for (;;) {
+ skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+ rx_buf->page, rx_buf->page_offset,
+ rx_buf->len);
+ rx_buf->page = NULL;
+ skb->len += rx_buf->len;
+ skb->data_len += rx_buf->len;
+ if (skb_shinfo(skb)->nr_frags == n_frags)
+ break;
+
+ rx_buf = efx_rx_buf_next(&channel->rx_queue, rx_buf);
+ }
} else {
__free_pages(rx_buf->page, efx->rx_buffer_order);
- skb->data_len = 0;
+ rx_buf->page = NULL;
+ n_frags = 0;
}
- /* Ownership has transferred from the rx_buf to skb */
- rx_buf->page = NULL;
+ skb->truesize += n_frags * efx->rx_buffer_truesize;
/* Move past the ethernet header */
skb->protocol = eth_type_trans(skb, efx->net_dev);
@@ -408,7 +444,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
}
void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
- unsigned int len, u16 flags)
+ unsigned int n_frags, unsigned int len, u16 flags)
{
struct efx_nic *efx = rx_queue->efx;
struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
@@ -417,35 +453,43 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
rx_buf = efx_rx_buffer(rx_queue, index);
rx_buf->flags |= flags;
- /* This allows the refill path to post another buffer.
- * EFX_RXD_HEAD_ROOM ensures that the slot we are using
- * isn't overwritten yet.
- */
- rx_queue->removed_count++;
-
- /* Validate the length encoded in the event vs the descriptor pushed */
- efx_rx_packet__check_len(rx_queue, rx_buf, len);
+ /* Validate the number of fragments and completed length */
+ if (n_frags == 1) {
+ efx_rx_packet__check_len(rx_queue, rx_buf, len);
+ } else if (unlikely(n_frags > EFX_RX_MAX_FRAGS) ||
+ unlikely(len <= (n_frags - 1) * EFX_RX_USR_BUF_SIZE) ||
+ unlikely(len > n_frags * EFX_RX_USR_BUF_SIZE) ||
+ unlikely(!efx->rx_scatter)) {
+ /* If this isn't an explicit discard request, either
+ * the hardware or the driver is broken.
+ */
+ WARN_ON(!(len == 0 && rx_buf->flags & EFX_RX_PKT_DISCARD));
+ rx_buf->flags |= EFX_RX_PKT_DISCARD;
+ }
netif_vdbg(efx, rx_status, efx->net_dev,
- "RX queue %d received id %x at %llx+%x %s%s\n",
+ "RX queue %d received ids %x-%x len %d %s%s\n",
efx_rx_queue_index(rx_queue), index,
- (unsigned long long)rx_buf->dma_addr, len,
+ (index + n_frags - 1) & rx_queue->ptr_mask, len,
(rx_buf->flags & EFX_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
(rx_buf->flags & EFX_RX_PKT_DISCARD) ? " [DISCARD]" : "");
- /* Discard packet, if instructed to do so */
+ /* Discard packet, if instructed to do so. Process the
+ * previous receive first.
+ */
if (unlikely(rx_buf->flags & EFX_RX_PKT_DISCARD)) {
- efx_recycle_rx_buffer(channel, rx_buf);
-
- /* Don't hold off the previous receive */
- rx_buf = NULL;
- goto out;
+ efx_rx_flush_packet(channel);
+ efx_recycle_rx_buffers(channel, rx_buf, n_frags);
+ return;
}
+ if (n_frags == 1)
+ rx_buf->len = len;
+
/* Release and/or sync DMA mapping - assumes all RX buffers
* consumed in-order per RX queue
*/
- efx_unmap_rx_buffer(efx, rx_buf, len);
+ efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
/* Prefetch nice and early so data will (hopefully) be in cache by
* the time we look at it.
@@ -453,23 +497,40 @@ void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
prefetch(efx_rx_buf_va(rx_buf));
rx_buf->page_offset += efx->type->rx_buffer_hash_size;
- rx_buf->len = len - efx->type->rx_buffer_hash_size;
+ rx_buf->len -= efx->type->rx_buffer_hash_size;
+
+ if (n_frags > 1) {
+ /* Release/sync DMA mapping for additional fragments.
+ * Fix length for last fragment.
+ */
+ unsigned int tail_frags = n_frags - 1;
+
+ for (;;) {
+ rx_buf = efx_rx_buf_next(rx_queue, rx_buf);
+ if (--tail_frags == 0)
+ break;
+ efx_unmap_rx_buffer(efx, rx_buf, EFX_RX_USR_BUF_SIZE);
+ }
+ rx_buf->len = len - (n_frags - 1) * EFX_RX_USR_BUF_SIZE;
+ efx_unmap_rx_buffer(efx, rx_buf, rx_buf->len);
+ }
/* Pipeline receives so that we give time for packet headers to be
* prefetched into cache.
*/
-out:
efx_rx_flush_packet(channel);
- channel->rx_pkt = rx_buf;
+ channel->rx_pkt_n_frags = n_frags;
+ channel->rx_pkt_index = index;
}
static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
- struct efx_rx_buffer *rx_buf)
+ struct efx_rx_buffer *rx_buf,
+ unsigned int n_frags)
{
struct sk_buff *skb;
u16 hdr_len = min_t(u16, rx_buf->len, EFX_SKB_HEADERS);
- skb = efx_rx_mk_skb(channel, rx_buf, eh, hdr_len);
+ skb = efx_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
if (unlikely(skb == NULL)) {
efx_free_rx_buffer(channel->efx, rx_buf);
return;
@@ -488,9 +549,11 @@ static void efx_rx_deliver(struct efx_channel *channel, u8 *eh,
}
/* Handle a received packet. Second half: Touches packet payload. */
-void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
+void __efx_rx_packet(struct efx_channel *channel)
{
struct efx_nic *efx = channel->efx;
+ struct efx_rx_buffer *rx_buf =
+ efx_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
u8 *eh = efx_rx_buf_va(rx_buf);
/* If we're in loopback test, then pass the packet directly to the
@@ -499,16 +562,18 @@ void __efx_rx_packet(struct efx_channel *channel, struct efx_rx_buffer *rx_buf)
if (unlikely(efx->loopback_selftest)) {
efx_loopback_rx_packet(efx, eh, rx_buf->len);
efx_free_rx_buffer(efx, rx_buf);
- return;
+ goto out;
}
if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
rx_buf->flags &= ~EFX_RX_PKT_CSUMMED;
if (!channel->type->receive_skb)
- efx_rx_packet_gro(channel, rx_buf, eh);
+ efx_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
else
- efx_rx_deliver(channel, eh, rx_buf);
+ efx_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
+out:
+ channel->rx_pkt_n_frags = 0;
}
int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index e07ff0d3f26b..51669244d154 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -414,6 +414,8 @@ static int siena_init_nic(struct efx_nic *efx)
EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_INSRT_HDR, 1);
EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_HASH_ALG, 1);
EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_IP_HASH, 1);
+ EFX_SET_OWORD_FIELD(temp, FRF_BZ_RX_USR_BUF_SIZE,
+ EFX_RX_USR_BUF_SIZE >> 5);
efx_writeo(efx, &temp, FR_AZ_RX_CFG);
/* Set hash key for IPv4 */
@@ -718,6 +720,7 @@ const struct efx_nic_type siena_a0_nic_type = {
.max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
.rx_buffer_hash_size = 0x10,
.rx_buffer_padding = 0,
+ .can_rx_scatter = true,
.max_interrupt_mode = EFX_INT_MODE_MSIX,
.phys_addr_channels = 32, /* Hardware limit is 64, but the legacy
* interrupt handler only supports 32