From cd2d5b529cdb9bd274f3e4bc68d37d4d63b7f383 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Feb 2012 00:48:07 +0000 Subject: sfc: Add SR-IOV back-end support for SFC9000 family On the SFC9000 family, each port has 1024 Virtual Interfaces (VIs), each with an RX queue, a TX queue, an event queue and a mailbox register. These may be assigned to up to 127 SR-IOV virtual functions per port, with up to 64 VIs per VF. We allocate an extra channel (IRQ and event queue only) to receive requests from VF drivers. There is a per-port limit of 4 concurrent RX queue flushes, and queue flushes may be initiated by the MC in response to a Function Level Reset (FLR) of a VF. Therefore, when SR-IOV is in use, we submit all flush requests via the MC. The RSS indirection table is shared with VFs, so the number of RX queues used in the PF is limited to the number of VIs per VF. This is almost entirely the work of Steve Hodgson, formerly shodgson@solarflare.com. Signed-off-by: Ben Hutchings --- drivers/net/ethernet/sfc/nic.c | 79 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 8 deletions(-) (limited to 'drivers/net/ethernet/sfc/nic.c') diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 747cf9439164..2bf4283f05fe 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -264,6 +264,10 @@ static int efx_alloc_special_buffer(struct efx_nic *efx, /* Select new buffer ID */ buffer->index = efx->next_buffer_table; efx->next_buffer_table += buffer->entries; +#ifdef CONFIG_SFC_SRIOV + BUG_ON(efx_sriov_enabled(efx) && + efx->vf_buftbl_base < efx->next_buffer_table); +#endif netif_dbg(efx, probe, efx->net_dev, "allocating special buffers %d-%d at %llx+%x " @@ -693,6 +697,16 @@ int efx_nic_flush_queues(struct efx_nic *efx) } while (timeout && atomic_read(&efx->drain_pending) > 0) { + /* If SRIOV is enabled, then offload receive queue flushing to + * the firmware (though we will still have to poll for + * completion). If that fails, fall back to the old scheme. + */ + if (efx_sriov_enabled(efx)) { + rc = efx_mcdi_flush_rxqs(efx); + if (!rc) + goto wait; + } + /* The hardware supports four concurrent rx flushes, each of * which may need to be retried if there is an outstanding * descriptor fetch @@ -712,6 +726,7 @@ int efx_nic_flush_queues(struct efx_nic *efx) } } + wait: timeout = wait_event_timeout(efx->flush_wq, efx_flush_wake(efx), timeout); } @@ -1102,11 +1117,13 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n", channel->channel, ev_sub_data); efx_handle_tx_flush_done(efx, event); + efx_sriov_tx_flush_done(efx, event); break; case FSE_AZ_RX_DESCQ_FLS_DONE_EV: netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n", channel->channel, ev_sub_data); efx_handle_rx_flush_done(efx, event); + efx_sriov_rx_flush_done(efx, event); break; case FSE_AZ_EVQ_INIT_DONE_EV: netif_dbg(efx, hw, efx->net_dev, @@ -1138,16 +1155,24 @@ efx_handle_driver_event(struct efx_channel *channel, efx_qword_t *event) RESET_TYPE_DISABLE); break; case FSE_BZ_RX_DSC_ERROR_EV: - netif_err(efx, rx_err, efx->net_dev, - "RX DMA Q %d reports descriptor fetch error." - " RX Q %d is disabled.\n", ev_sub_data, ev_sub_data); - efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); + if (ev_sub_data < EFX_VI_BASE) { + netif_err(efx, rx_err, efx->net_dev, + "RX DMA Q %d reports descriptor fetch error." + " RX Q %d is disabled.\n", ev_sub_data, + ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_RX_DESC_FETCH); + } else + efx_sriov_desc_fetch_err(efx, ev_sub_data); break; case FSE_BZ_TX_DSC_ERROR_EV: - netif_err(efx, tx_err, efx->net_dev, - "TX DMA Q %d reports descriptor fetch error." - " TX Q %d is disabled.\n", ev_sub_data, ev_sub_data); - efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + if (ev_sub_data < EFX_VI_BASE) { + netif_err(efx, tx_err, efx->net_dev, + "TX DMA Q %d reports descriptor fetch error." + " TX Q %d is disabled.\n", ev_sub_data, + ev_sub_data); + efx_schedule_reset(efx, RESET_TYPE_TX_DESC_FETCH); + } else + efx_sriov_desc_fetch_err(efx, ev_sub_data); break; default: netif_vdbg(efx, hw, efx->net_dev, @@ -1207,6 +1232,9 @@ int efx_nic_process_eventq(struct efx_channel *channel, int budget) case FSE_AZ_EV_CODE_DRIVER_EV: efx_handle_driver_event(channel, &event); break; + case FSE_CZ_EV_CODE_USER_EV: + efx_sriov_event(channel, &event); + break; case FSE_CZ_EV_CODE_MCDI_EV: efx_mcdi_process_event(channel, &event); break; @@ -1609,6 +1637,15 @@ void efx_nic_fini_interrupt(struct efx_nic *efx) free_irq(efx->legacy_irq, efx); } +/* Looks at available SRAM resources and works out how many queues we + * can support, and where things like descriptor caches should live. + * + * SRAM is split up as follows: + * 0 buftbl entries for channels + * efx->vf_buftbl_base buftbl entries for SR-IOV + * efx->rx_dc_base RX descriptor caches + * efx->tx_dc_base TX descriptor caches + */ void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) { unsigned vi_count, buftbl_min; @@ -1622,6 +1659,32 @@ void efx_nic_dimension_resources(struct efx_nic *efx, unsigned sram_lim_qw) * sizeof(efx_qword_t) / EFX_BUF_SIZE); vi_count = max(efx->n_channels, efx->n_tx_channels * EFX_TXQ_TYPES); +#ifdef CONFIG_SFC_SRIOV + if (efx_sriov_wanted(efx)) { + unsigned vi_dc_entries, buftbl_free, entries_per_vf, vf_limit; + + efx->vf_buftbl_base = buftbl_min; + + vi_dc_entries = RX_DC_ENTRIES + TX_DC_ENTRIES; + vi_count = max(vi_count, EFX_VI_BASE); + buftbl_free = (sram_lim_qw - buftbl_min - + vi_count * vi_dc_entries); + + entries_per_vf = ((vi_dc_entries + EFX_VF_BUFTBL_PER_VI) * + efx_vf_size(efx)); + vf_limit = min(buftbl_free / entries_per_vf, + (1024U - EFX_VI_BASE) >> efx->vi_scale); + + if (efx->vf_count > vf_limit) { + netif_err(efx, probe, efx->net_dev, + "Reducing VF count from from %d to %d\n", + efx->vf_count, vf_limit); + efx->vf_count = vf_limit; + } + vi_count += efx->vf_count * efx_vf_size(efx); + } +#endif + efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES; efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES; } -- cgit v1.2.3