diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-27 13:33:21 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-27 13:33:21 -0400 |
commit | 5d22d47b9ed96eddb35821dc2cc4f629f45827f7 (patch) | |
tree | a5e601f614c88f93b9b7919153cdbd70d66ab8c5 | |
parent | c709002c23f91d90eb6ee2d4efbb548a8fe3cc80 (diff) | |
parent | a8e8fbebde5ded18e94c36220397521021d941ce (diff) | |
download | linux-5d22d47b9ed96eddb35821dc2cc4f629f45827f7.tar.bz2 |
Merge branch 'sfc-filter-locking'
Edward Cree says:
====================
sfc: rework locking around filter management
The use of a spinlock to protect filter state combined with the need for a
sleeping operation (MCDI) to apply that state to the NIC (on EF10) led to
unfixable race conditions, around the handling of filter restoration after
an MC reboot.
So, this patch series removes the requirement to be able to modify the SW
filter table from atomic context, by using a workqueue to request
asynchronous filter operations (which are needed for ARFS). Then, the
filter table locks are changed to mutexes, replacing the dance of spinlocks
and 'busy' flags. Also, a mutex is added to protect the RSS context state,
since otherwise a similar race is possible around restoring that after an
MC reboot. While we're at it, fix a couple of other related bugs.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/sfc/ef10.c | 545 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.c | 30 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/efx.h | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/ethtool.c | 77 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/farch.c | 80 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/net_driver.h | 16 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/nic.h | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/rx.c | 119 | ||||
-rw-r--r-- | drivers/net/ethernet/sfc/siena.c | 1 |
9 files changed, 400 insertions, 484 deletions
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e100273b623d..c4c45c94da77 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -96,17 +96,15 @@ struct efx_ef10_filter_table { MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2]; unsigned int rx_match_count; + struct rw_semaphore lock; /* Protects entries */ struct { unsigned long spec; /* pointer to spec plus flag bits */ -/* BUSY flag indicates that an update is in progress. AUTO_OLD is - * used to mark and sweep MAC filters for the device address lists. - */ -#define EFX_EF10_FILTER_FLAG_BUSY 1UL +/* AUTO_OLD is used to mark and sweep MAC filters for the device address lists. */ +/* unused flag 1UL */ #define EFX_EF10_FILTER_FLAG_AUTO_OLD 2UL #define EFX_EF10_FILTER_FLAGS 3UL u64 handle; /* firmware handle */ } *entry; - wait_queue_head_t waitq; /* Shadow of net_device address lists, guarded by mac_lock */ struct efx_ef10_dev_addr dev_uc_list[EFX_EF10_FILTER_DEV_UC_MAX]; struct efx_ef10_dev_addr dev_mc_list[EFX_EF10_FILTER_DEV_MC_MAX]; @@ -1501,6 +1499,7 @@ static void efx_ef10_reset_mc_allocations(struct efx_nic *efx) /* All our allocations have been reset */ nic_data->must_realloc_vis = true; + nic_data->must_restore_rss_contexts = true; nic_data->must_restore_filters = true; nic_data->must_restore_piobufs = true; efx_ef10_forget_old_piobufs(efx); @@ -2901,6 +2900,8 @@ static int efx_ef10_rx_push_rss_context_config(struct efx_nic *efx, { int rc; + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { rc = efx_ef10_alloc_rss_context(efx, true, ctx, NULL); if (rc) @@ -2931,6 +2932,8 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx, size_t outlen; int rc, i; + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); @@ -2974,14 +2977,25 @@ static int efx_ef10_rx_pull_rss_context_config(struct efx_nic *efx, static int efx_ef10_rx_pull_rss_config(struct efx_nic *efx) { - return efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context); + int rc; + + mutex_lock(&efx->rss_lock); + rc = efx_ef10_rx_pull_rss_context_config(efx, &efx->rss_context); + mutex_unlock(&efx->rss_lock); + return rc; } static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx) { + struct efx_ef10_nic_data *nic_data = efx->nic_data; struct efx_rss_context *ctx; int rc; + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + + if (!nic_data->must_restore_rss_contexts) + return; + list_for_each_entry(ctx, &efx->rss_context.list, list) { /* previous NIC RSS context is gone */ ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; @@ -2995,6 +3009,7 @@ static void efx_ef10_rx_restore_rss_contexts(struct efx_nic *efx) "; RSS filters may fail to be applied\n", ctx->user_id, rc); } + nic_data->must_restore_rss_contexts = false; } static int efx_ef10_pf_rx_push_rss_config(struct efx_nic *efx, bool user, @@ -4302,26 +4317,35 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, struct efx_filter_spec *spec, bool replace_equal) { - struct efx_ef10_filter_table *table = efx->filter_state; DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + struct efx_ef10_nic_data *nic_data = efx->nic_data; + struct efx_ef10_filter_table *table; struct efx_filter_spec *saved_spec; struct efx_rss_context *ctx = NULL; unsigned int match_pri, hash; unsigned int priv_flags; + bool rss_locked = false; bool replacing = false; + unsigned int depth, i; int ins_index = -1; DEFINE_WAIT(wait); bool is_mc_recip; s32 rc; + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + /* For now, only support RX filters */ if ((spec->flags & (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_TX)) != - EFX_FILTER_FLAG_RX) - return -EINVAL; + EFX_FILTER_FLAG_RX) { + rc = -EINVAL; + goto out_unlock; + } rc = efx_ef10_filter_pri(table, spec); if (rc < 0) - return rc; + goto out_unlock; match_pri = rc; hash = efx_ef10_filter_hash(spec); @@ -4330,91 +4354,70 @@ static s32 efx_ef10_filter_insert(struct efx_nic *efx, bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { + mutex_lock(&efx->rss_lock); + rss_locked = true; if (spec->rss_context) - ctx = efx_find_rss_context_entry(spec->rss_context, - &efx->rss_context.list); + ctx = efx_find_rss_context_entry(efx, spec->rss_context); else ctx = &efx->rss_context; - if (!ctx) - return -ENOENT; - if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) - return -EOPNOTSUPP; + if (!ctx) { + rc = -ENOENT; + goto out_unlock; + } + if (ctx->context_id == EFX_EF10_RSS_CONTEXT_INVALID) { + rc = -EOPNOTSUPP; + goto out_unlock; + } } /* Find any existing filters with the same match tuple or - * else a free slot to insert at. If any of them are busy, - * we have to wait and retry. + * else a free slot to insert at. */ - for (;;) { - unsigned int depth = 1; - unsigned int i; - - spin_lock_bh(&efx->filter_lock); - - for (;;) { - i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); - saved_spec = efx_ef10_filter_entry_spec(table, i); + for (depth = 1; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { + i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); + saved_spec = efx_ef10_filter_entry_spec(table, i); - if (!saved_spec) { - if (ins_index < 0) - ins_index = i; - } else if (efx_ef10_filter_equal(spec, saved_spec)) { - if (table->entry[i].spec & - EFX_EF10_FILTER_FLAG_BUSY) - break; - if (spec->priority < saved_spec->priority && - spec->priority != EFX_FILTER_PRI_AUTO) { - rc = -EPERM; - goto out_unlock; - } - if (!is_mc_recip) { - /* This is the only one */ - if (spec->priority == - saved_spec->priority && - !replace_equal) { - rc = -EEXIST; - goto out_unlock; - } - ins_index = i; - goto found; - } else if (spec->priority > - saved_spec->priority || - (spec->priority == - saved_spec->priority && - replace_equal)) { - if (ins_index < 0) - ins_index = i; - else - __set_bit(depth, mc_rem_map); - } + if (!saved_spec) { + if (ins_index < 0) + ins_index = i; + } else if (efx_ef10_filter_equal(spec, saved_spec)) { + if (spec->priority < saved_spec->priority && + spec->priority != EFX_FILTER_PRI_AUTO) { + rc = -EPERM; + goto out_unlock; } - - /* Once we reach the maximum search depth, use - * the first suitable slot or return -EBUSY if - * there was none - */ - if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) { - if (ins_index < 0) { - rc = -EBUSY; + if (!is_mc_recip) { + /* This is the only one */ + if (spec->priority == + saved_spec->priority && + !replace_equal) { + rc = -EEXIST; goto out_unlock; } - goto found; + ins_index = i; + break; + } else if (spec->priority > + saved_spec->priority || + (spec->priority == + saved_spec->priority && + replace_equal)) { + if (ins_index < 0) + ins_index = i; + else + __set_bit(depth, mc_rem_map); } - - ++depth; } - - prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock_bh(&efx->filter_lock); - schedule(); } -found: - /* Create a software table entry if necessary, and mark it - * busy. We might yet fail to insert, but any attempt to - * insert a conflicting filter while we're waiting for the - * firmware must find the busy entry. + /* Once we reach the maximum search depth, use the first suitable + * slot, or return -EBUSY if there was none */ + if (ins_index < 0) { + rc = -EBUSY; + goto out_unlock; + } + + /* Create a software table entry if necessary. */ saved_spec = efx_ef10_filter_entry_spec(table, ins_index); if (saved_spec) { if (spec->priority == EFX_FILTER_PRI_AUTO && @@ -4438,28 +4441,19 @@ found: *saved_spec = *spec; priv_flags = 0; } - efx_ef10_filter_set_entry(table, ins_index, saved_spec, - priv_flags | EFX_EF10_FILTER_FLAG_BUSY); - - /* Mark lower-priority multicast recipients busy prior to removal */ - if (is_mc_recip) { - unsigned int depth, i; - - for (depth = 0; depth < EFX_EF10_FILTER_SEARCH_LIMIT; depth++) { - i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); - if (test_bit(depth, mc_rem_map)) - table->entry[i].spec |= - EFX_EF10_FILTER_FLAG_BUSY; - } - } - - spin_unlock_bh(&efx->filter_lock); + efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags); + /* Actually insert the filter on the HW */ rc = efx_ef10_filter_push(efx, spec, &table->entry[ins_index].handle, ctx, replacing); + if (rc == -EINVAL && nic_data->must_realloc_vis) + /* The MC rebooted under us, causing it to reject our filter + * insertion as pointing to an invalid VI (spec->dmaq_id). + */ + rc = -EAGAIN; + /* Finalise the software table entry */ - spin_lock_bh(&efx->filter_lock); if (rc == 0) { if (replacing) { /* Update the fields that may differ */ @@ -4475,6 +4469,12 @@ found: } else if (!replacing) { kfree(saved_spec); saved_spec = NULL; + } else { + /* We failed to replace, so the old filter is still present. + * Roll back the software table to reflect this. In fact the + * efx_ef10_filter_set_entry() call below will do the right + * thing, so nothing extra is needed here. + */ } efx_ef10_filter_set_entry(table, ins_index, saved_spec, priv_flags); @@ -4496,7 +4496,6 @@ found: priv_flags = efx_ef10_filter_entry_flags(table, i); if (rc == 0) { - spin_unlock_bh(&efx->filter_lock); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, MC_CMD_FILTER_OP_IN_OP_UNSUBSCRIBE); MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, @@ -4504,15 +4503,12 @@ found: rc = efx_mcdi_rpc(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), NULL, 0, NULL); - spin_lock_bh(&efx->filter_lock); } if (rc == 0) { kfree(saved_spec); saved_spec = NULL; priv_flags = 0; - } else { - priv_flags &= ~EFX_EF10_FILTER_FLAG_BUSY; } efx_ef10_filter_set_entry(table, i, saved_spec, priv_flags); @@ -4523,10 +4519,11 @@ found: if (rc == 0) rc = efx_ef10_make_filter_id(match_pri, ins_index); - wake_up_all(&table->waitq); out_unlock: - spin_unlock_bh(&efx->filter_lock); - finish_wait(&table->waitq, &wait); + if (rss_locked) + mutex_unlock(&efx->rss_lock); + up_write(&table->lock); + up_read(&efx->filter_sem); return rc; } @@ -4539,6 +4536,8 @@ static void efx_ef10_filter_update_rx_scatter(struct efx_nic *efx) * If !by_index, remove by ID * If by_index, remove by index * Filter ID may come from userland and must be range-checked. + * Caller must hold efx->filter_sem for read, and efx->filter_state->lock + * for write. */ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, unsigned int priority_mask, @@ -4553,45 +4552,23 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, DEFINE_WAIT(wait); int rc; - /* Find the software table entry and mark it busy. Don't - * remove it yet; any attempt to update while we're waiting - * for the firmware must find the busy entry. - */ - for (;;) { - spin_lock_bh(&efx->filter_lock); - if (!(table->entry[filter_idx].spec & - EFX_EF10_FILTER_FLAG_BUSY)) - break; - prepare_to_wait(&table->waitq, &wait, TASK_UNINTERRUPTIBLE); - spin_unlock_bh(&efx->filter_lock); - schedule(); - } - spec = efx_ef10_filter_entry_spec(table, filter_idx); if (!spec || (!by_index && efx_ef10_filter_pri(table, spec) != - efx_ef10_filter_get_unsafe_pri(filter_id))) { - rc = -ENOENT; - goto out_unlock; - } + efx_ef10_filter_get_unsafe_pri(filter_id))) + return -ENOENT; if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO && priority_mask == (1U << EFX_FILTER_PRI_AUTO)) { /* Just remove flags */ spec->flags &= ~EFX_FILTER_FLAG_RX_OVER_AUTO; table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_AUTO_OLD; - rc = 0; - goto out_unlock; - } - - if (!(priority_mask & (1U << spec->priority))) { - rc = -ENOENT; - goto out_unlock; + return 0; } - table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; - spin_unlock_bh(&efx->filter_lock); + if (!(priority_mask & (1U << spec->priority))) + return -ENOENT; if (spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) { /* Reset to an automatic filter */ @@ -4609,7 +4586,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, &efx->rss_context, true); - spin_lock_bh(&efx->filter_lock); if (rc == 0) *spec = new_spec; } else { @@ -4624,7 +4600,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, rc = efx_mcdi_rpc_quiet(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), NULL, 0, NULL); - spin_lock_bh(&efx->filter_lock); if ((rc == 0) || (rc == -ENOENT)) { /* Filter removed OK or didn't actually exist */ kfree(spec); @@ -4636,11 +4611,6 @@ static int efx_ef10_filter_remove_internal(struct efx_nic *efx, } } - table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY; - wake_up_all(&table->waitq); -out_unlock: - spin_unlock_bh(&efx->filter_lock); - finish_wait(&table->waitq, &wait); return rc; } @@ -4648,17 +4618,33 @@ static int efx_ef10_filter_remove_safe(struct efx_nic *efx, enum efx_filter_priority priority, u32 filter_id) { - return efx_ef10_filter_remove_internal(efx, 1U << priority, - filter_id, false); + struct efx_ef10_filter_table *table; + int rc; + + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + rc = efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, + false); + up_write(&table->lock); + up_read(&efx->filter_sem); + return rc; } +/* Caller must hold efx->filter_sem for read */ static void efx_ef10_filter_remove_unsafe(struct efx_nic *efx, enum efx_filter_priority priority, u32 filter_id) { + struct efx_ef10_filter_table *table = efx->filter_state; + if (filter_id == EFX_EF10_FILTER_ID_INVALID) return; - efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, true); + + down_write(&table->lock); + efx_ef10_filter_remove_internal(efx, 1U << priority, filter_id, + true); + up_write(&table->lock); } static int efx_ef10_filter_get_safe(struct efx_nic *efx, @@ -4666,11 +4652,13 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx, u32 filter_id, struct efx_filter_spec *spec) { unsigned int filter_idx = efx_ef10_filter_get_unsafe_id(filter_id); - struct efx_ef10_filter_table *table = efx->filter_state; const struct efx_filter_spec *saved_spec; + struct efx_ef10_filter_table *table; int rc; - spin_lock_bh(&efx->filter_lock); + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); saved_spec = efx_ef10_filter_entry_spec(table, filter_idx); if (saved_spec && saved_spec->priority == priority && efx_ef10_filter_pri(table, saved_spec) == @@ -4680,13 +4668,15 @@ static int efx_ef10_filter_get_safe(struct efx_nic *efx, } else { rc = -ENOENT; } - spin_unlock_bh(&efx->filter_lock); + up_read(&table->lock); + up_read(&efx->filter_sem); return rc; } static int efx_ef10_filter_clear_rx(struct efx_nic *efx, - enum efx_filter_priority priority) + enum efx_filter_priority priority) { + struct efx_ef10_filter_table *table; unsigned int priority_mask; unsigned int i; int rc; @@ -4694,31 +4684,40 @@ static int efx_ef10_filter_clear_rx(struct efx_nic *efx, priority_mask = (((1U << (priority + 1)) - 1) & ~(1U << EFX_FILTER_PRI_AUTO)); + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { rc = efx_ef10_filter_remove_internal(efx, priority_mask, i, true); if (rc && rc != -ENOENT) - return rc; + break; + rc = 0; } - return 0; + up_write(&table->lock); + up_read(&efx->filter_sem); + return rc; } static u32 efx_ef10_filter_count_rx_used(struct efx_nic *efx, enum efx_filter_priority priority) { - struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_ef10_filter_table *table; unsigned int filter_idx; s32 count = 0; - spin_lock_bh(&efx->filter_lock); + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { if (table->entry[filter_idx].spec && efx_ef10_filter_entry_spec(table, filter_idx)->priority == priority) ++count; } - spin_unlock_bh(&efx->filter_lock); + up_read(&table->lock); + up_read(&efx->filter_sem); return count; } @@ -4733,12 +4732,15 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, enum efx_filter_priority priority, u32 *buf, u32 size) { - struct efx_ef10_filter_table *table = efx->filter_state; + struct efx_ef10_filter_table *table; struct efx_filter_spec *spec; unsigned int filter_idx; s32 count = 0; - spin_lock_bh(&efx->filter_lock); + down_read(&efx->filter_sem); + table = efx->filter_state; + down_read(&table->lock); + for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { spec = efx_ef10_filter_entry_spec(table, filter_idx); if (spec && spec->priority == priority) { @@ -4752,202 +4754,42 @@ static s32 efx_ef10_filter_get_rx_ids(struct efx_nic *efx, filter_idx); } } - spin_unlock_bh(&efx->filter_lock); + up_read(&table->lock); + up_read(&efx->filter_sem); return count; } #ifdef CONFIG_RFS_ACCEL -static efx_mcdi_async_completer efx_ef10_filter_rfs_insert_complete; - -static s32 efx_ef10_filter_rfs_insert(struct efx_nic *efx, - struct efx_filter_spec *spec) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); - struct efx_filter_spec *saved_spec; - unsigned int hash, i, depth = 1; - bool replacing = false; - int ins_index = -1; - u64 cookie; - s32 rc; - - /* Must be an RX filter without RSS and not for a multicast - * destination address (RFS only works for connected sockets). - * These restrictions allow us to pass only a tiny amount of - * data through to the completion function. - */ - EFX_WARN_ON_PARANOID(spec->flags != - (EFX_FILTER_FLAG_RX | EFX_FILTER_FLAG_RX_SCATTER)); - EFX_WARN_ON_PARANOID(spec->priority != EFX_FILTER_PRI_HINT); - EFX_WARN_ON_PARANOID(efx_filter_is_mc_recipient(spec)); - - hash = efx_ef10_filter_hash(spec); - - spin_lock_bh(&efx->filter_lock); - - /* Find any existing filter with the same match tuple or else - * a free slot to insert at. If an existing filter is busy, - * we have to give up. - */ - for (;;) { - i = (hash + depth) & (HUNT_FILTER_TBL_ROWS - 1); - saved_spec = efx_ef10_filter_entry_spec(table, i); - - if (!saved_spec) { - if (ins_index < 0) - ins_index = i; - } else if (efx_ef10_filter_equal(spec, saved_spec)) { - if (table->entry[i].spec & EFX_EF10_FILTER_FLAG_BUSY) { - rc = -EBUSY; - goto fail_unlock; - } - if (spec->priority < saved_spec->priority) { - rc = -EPERM; - goto fail_unlock; - } - ins_index = i; - break; - } - - /* Once we reach the maximum search depth, use the - * first suitable slot or return -EBUSY if there was - * none - */ - if (depth == EFX_EF10_FILTER_SEARCH_LIMIT) { - if (ins_index < 0) { - rc = -EBUSY; - goto fail_unlock; - } - break; - } - - ++depth; - } - - /* Create a software table entry if necessary, and mark it - * busy. We might yet fail to insert, but any attempt to - * insert a conflicting filter while we're waiting for the - * firmware must find the busy entry. - */ - saved_spec = efx_ef10_filter_entry_spec(table, ins_index); - if (saved_spec) { - replacing = true; - } else { - saved_spec = kmalloc(sizeof(*spec), GFP_ATOMIC); - if (!saved_spec) { - rc = -ENOMEM; - goto fail_unlock; - } - *saved_spec = *spec; - } - efx_ef10_filter_set_entry(table, ins_index, saved_spec, - EFX_EF10_FILTER_FLAG_BUSY); - - spin_unlock_bh(&efx->filter_lock); - - /* Pack up the variables needed on completion */ - cookie = replacing << 31 | ins_index << 16 | spec->dmaq_id; - - efx_ef10_filter_push_prep(efx, spec, inbuf, - table->entry[ins_index].handle, NULL, - replacing); - efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), - MC_CMD_FILTER_OP_OUT_LEN, - efx_ef10_filter_rfs_insert_complete, cookie); - - return ins_index; - -fail_unlock: - spin_unlock_bh(&efx->filter_lock); - return rc; -} - -static void -efx_ef10_filter_rfs_insert_complete(struct efx_nic *efx, unsigned long cookie, - int rc, efx_dword_t *outbuf, - size_t outlen_actual) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - unsigned int ins_index, dmaq_id; - struct efx_filter_spec *spec; - bool replacing; - - /* Unpack the cookie */ - replacing = cookie >> 31; - ins_index = (cookie >> 16) & (HUNT_FILTER_TBL_ROWS - 1); - dmaq_id = cookie & 0xffff; - - spin_lock_bh(&efx->filter_lock); - spec = efx_ef10_filter_entry_spec(table, ins_index); - if (rc == 0) { - table->entry[ins_index].handle = - MCDI_QWORD(outbuf, FILTER_OP_OUT_HANDLE); - if (replacing) - spec->dmaq_id = dmaq_id; - } else if (!replacing) { - kfree(spec); - spec = NULL; - } - efx_ef10_filter_set_entry(table, ins_index, spec, 0); - spin_unlock_bh(&efx->filter_lock); - - wake_up_all(&table->waitq); -} - -static void -efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx, - unsigned long filter_idx, - int rc, efx_dword_t *outbuf, - size_t outlen_actual); - static bool efx_ef10_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, unsigned int filter_idx) { - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_filter_spec *spec = - efx_ef10_filter_entry_spec(table, filter_idx); - MCDI_DECLARE_BUF(inbuf, - MC_CMD_FILTER_OP_IN_HANDLE_OFST + - MC_CMD_FILTER_OP_IN_HANDLE_LEN); - - if (!spec || - (table->entry[filter_idx].spec & EFX_EF10_FILTER_FLAG_BUSY) || - spec->priority != EFX_FILTER_PRI_HINT || - !rps_may_expire_flow(efx->net_dev, spec->dmaq_id, - flow_id, filter_idx)) - return false; - - MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, - MC_CMD_FILTER_OP_IN_OP_REMOVE); - MCDI_SET_QWORD(inbuf, FILTER_OP_IN_HANDLE, - table->entry[filter_idx].handle); - if (efx_mcdi_rpc_async(efx, MC_CMD_FILTER_OP, inbuf, sizeof(inbuf), 0, - efx_ef10_filter_rfs_expire_complete, filter_idx)) - return false; + struct efx_ef10_filter_table *table; + struct efx_filter_spec *spec; + bool ret; - table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; - return true; -} + down_read(&efx->filter_sem); + table = efx->filter_state; + down_write(&table->lock); + spec = efx_ef10_filter_entry_spec(table, filter_idx); -static void -efx_ef10_filter_rfs_expire_complete(struct efx_nic *efx, - unsigned long filter_idx, - int rc, efx_dword_t *outbuf, - size_t outlen_actual) -{ - struct efx_ef10_filter_table *table = efx->filter_state; - struct efx_filter_spec *spec = - efx_ef10_filter_entry_spec(table, filter_idx); + if (!spec || spec->priority != EFX_FILTER_PRI_HINT) { + ret = true; + goto out_unlock; + } - spin_lock_bh(&efx->filter_lock); - if (rc == 0) { - kfree(spec); - efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); + if (!rps_may_expire_flow(efx->net_dev, spec->dmaq_id, + flow_id, filter_idx)) { + ret = false; + goto out_unlock; } - table->entry[filter_idx].spec &= ~EFX_EF10_FILTER_FLAG_BUSY; - wake_up_all(&table->waitq); - spin_unlock_bh(&efx->filter_lock); + + ret = efx_ef10_filter_remove_internal(efx, 1U << spec->priority, + filter_idx, true) == 0; +out_unlock: + up_write(&table->lock); + up_read(&efx->filter_sem); + return ret; } #endif /* CONFIG_RFS_ACCEL */ @@ -5142,9 +4984,9 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx) table->vlan_filter = !!(efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_FILTER); INIT_LIST_HEAD(&table->vlan_list); + init_rwsem(&table->lock); efx->filter_state = table; - init_waitqueue_head(&table->waitq); list_for_each_entry(vlan, &nic_data->vlan_list, list) { rc = efx_ef10_filter_add_vlan(efx, vlan->vid); @@ -5186,7 +5028,8 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) if (!table) return; - spin_lock_bh(&efx->filter_lock); + down_write(&table->lock); + mutex_lock(&efx->rss_lock); for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { spec = efx_ef10_filter_entry_spec(table, filter_idx); @@ -5203,8 +5046,7 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) goto not_restored; } if (spec->rss_context) - ctx = efx_find_rss_context_entry(spec->rss_context, - &efx->rss_context.list); + ctx = efx_find_rss_context_entry(efx, spec->rss_context); else ctx = &efx->rss_context; if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { @@ -5224,15 +5066,11 @@ static void efx_ef10_filter_table_restore(struct efx_nic *efx) } } - table->entry[filter_idx].spec |= EFX_EF10_FILTER_FLAG_BUSY; - spin_unlock_bh(&efx->filter_lock); - rc = efx_ef10_filter_push(efx, spec, &table->entry[filter_idx].handle, ctx, false); if (rc) failed++; - spin_lock_bh(&efx->filter_lock); if (rc) { not_restored: @@ -5244,13 +5082,11 @@ not_restored: kfree(spec); efx_ef10_filter_set_entry(table, filter_idx, NULL, 0); - } else { - table->entry[filter_idx].spec &= - ~EFX_EF10_FILTER_FLAG_BUSY; } } - spin_unlock_bh(&efx->filter_lock); + mutex_unlock(&efx->rss_lock); + up_write(&table->lock); /* This can happen validly if the MC's capabilities have changed, so * is not an error. @@ -5318,6 +5154,8 @@ static void efx_ef10_filter_mark_one_old(struct efx_nic *efx, uint16_t *id) struct efx_ef10_filter_table *table = efx->filter_state; unsigned int filter_idx; + efx_rwsem_assert_write_locked(&table->lock); + if (*id != EFX_EF10_FILTER_ID_INVALID) { filter_idx = efx_ef10_filter_get_unsafe_id(*id); if (!table->entry[filter_idx].spec) @@ -5353,10 +5191,10 @@ static void efx_ef10_filter_mark_old(struct efx_nic *efx) struct efx_ef10_filter_table *table = efx->filter_state; struct efx_ef10_filter_vlan *vlan; - spin_lock_bh(&efx->filter_lock); + down_write(&table->lock); list_for_each_entry(vlan, &table->vlan_list, list) _efx_ef10_filter_vlan_mark_old(efx, vlan); - spin_unlock_bh(&efx->filter_lock); + up_write(&table->lock); } static void efx_ef10_filter_uc_addr_list(struct efx_nic *efx) @@ -5633,10 +5471,7 @@ static int efx_ef10_filter_insert_def(struct efx_nic *efx, return rc; } -/* Remove filters that weren't renewed. Since nothing else changes the AUTO_OLD - * flag or removes these filters, we don't need to hold the filter_lock while - * scanning for these filters. - */ +/* Remove filters that weren't renewed. */ static void efx_ef10_filter_remove_old(struct efx_nic *efx) { struct efx_ef10_filter_table *table = efx->filter_state; @@ -5645,6 +5480,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx) int rc; int i; + down_write(&table->lock); for (i = 0; i < HUNT_FILTER_TBL_ROWS; i++) { if (READ_ONCE(table->entry[i].spec) & EFX_EF10_FILTER_FLAG_AUTO_OLD) { @@ -5656,6 +5492,7 @@ static void efx_ef10_filter_remove_old(struct efx_nic *efx) remove_failed++; } } + up_write(&table->lock); if (remove_failed) netif_info(efx, drv, efx->net_dev, @@ -6784,7 +6621,6 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, #ifdef CONFIG_RFS_ACCEL - .filter_rfs_insert = efx_ef10_filter_rfs_insert, .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, #endif #ifdef CONFIG_SFC_MTD @@ -6897,7 +6733,6 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .filter_get_rx_id_limit = efx_ef10_filter_get_rx_id_limit, .filter_get_rx_ids = efx_ef10_filter_get_rx_ids, #ifdef CONFIG_RFS_ACCEL - .filter_rfs_insert = efx_ef10_filter_rfs_insert, .filter_rfs_expire_one = efx_ef10_filter_rfs_expire_one, #endif #ifdef CONFIG_SFC_MTD diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 7321a4cf6f4d..692dd729ee2a 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -340,7 +340,10 @@ static int efx_poll(struct napi_struct *napi, int budget) efx_update_irq_mod(efx, channel); } - efx_filter_rfs_expire(channel); +#ifdef CONFIG_RFS_ACCEL + /* Perhaps expire some ARFS filters */ + schedule_work(&channel->filter_work); +#endif /* There is no race here; although napi_disable() will * only wait for napi_complete(), this isn't a problem @@ -470,6 +473,10 @@ efx_alloc_channel(struct efx_nic *efx, int i, struct efx_channel *old_channel) tx_queue->channel = channel; } +#ifdef CONFIG_RFS_ACCEL + INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif + rx_queue = &channel->rx_queue; rx_queue->efx = efx; timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); @@ -512,6 +519,9 @@ efx_copy_channel(const struct efx_channel *old_channel) rx_queue->buffer = NULL; memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd)); timer_setup(&rx_queue->slow_fill, efx_rx_slow_fill, 0); +#ifdef CONFIG_RFS_ACCEL + INIT_WORK(&channel->filter_work, efx_filter_rfs_expire); +#endif return channel; } @@ -1773,7 +1783,6 @@ static int efx_probe_filters(struct efx_nic *efx) { int rc; - spin_lock_init(&efx->filter_lock); init_rwsem(&efx->filter_sem); mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); @@ -2648,6 +2657,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) efx_disable_interrupts(efx); mutex_lock(&efx->mac_lock); + mutex_lock(&efx->rss_lock); if (efx->port_initialized && method != RESET_TYPE_INVISIBLE && method != RESET_TYPE_DATAPATH) efx->phy_op->fini(efx); @@ -2703,6 +2713,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) if (efx->type->rx_restore_rss_contexts) efx->type->rx_restore_rss_contexts(efx); + mutex_unlock(&efx->rss_lock); down_read(&efx->filter_sem); efx_restore_filters(efx); up_read(&efx->filter_sem); @@ -2721,6 +2732,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) fail: efx->port_initialized = false; + mutex_unlock(&efx->rss_lock); mutex_unlock(&efx->mac_lock); return rc; @@ -3007,11 +3019,15 @@ static int efx_init_struct(struct efx_nic *efx, efx->rx_packet_ts_offset = efx->type->rx_ts_offset - efx->type->rx_prefix_size; INIT_LIST_HEAD(&efx->rss_context.list); + mutex_init(&efx->rss_lock); spin_lock_init(&efx->stats_lock); efx->vi_stride = EFX_DEFAULT_VI_STRIDE; efx->num_mac_stats = MC_CMD_MAC_NSTATS; BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); mutex_init(&efx->mac_lock); +#ifdef CONFIG_RFS_ACCEL + mutex_init(&efx->rps_mutex); +#endif efx->phy_op = &efx_dummy_phy_operations; efx->mdio.dev = net_dev; INIT_WORK(&efx->mac_work, efx_mac_work); @@ -3079,11 +3095,14 @@ void efx_update_sw_stats(struct efx_nic *efx, u64 *stats) /* RSS contexts. We're using linked lists and crappy O(n) algorithms, because * (a) this is an infrequent control-plane operation and (b) n is small (max 64) */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head) +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) { + struct list_head *head = &efx->rss_context.list; struct efx_rss_context *ctx, *new; u32 id = 1; /* Don't use zero, that refers to the master RSS context */ + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + /* Search for first gap in the numbering */ list_for_each_entry(ctx, head, list) { if (ctx->user_id != id) @@ -3109,10 +3128,13 @@ struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *head) return new; } -struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *head) +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) { + struct list_head *head = &efx->rss_context.list; struct efx_rss_context *ctx; + WARN_ON(!mutex_is_locked(&efx->rss_lock)); + list_for_each_entry(ctx, head, list) if (ctx->user_id == id) return ctx; diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 3429ae3f3b08..a3140e16fcef 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -170,22 +170,25 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id); bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned quota); -static inline void efx_filter_rfs_expire(struct efx_channel *channel) +static inline void efx_filter_rfs_expire(struct work_struct *data) { + struct efx_channel *channel = container_of(data, struct efx_channel, + filter_work); + if (channel->rfs_filters_added >= 60 && __efx_filter_rfs_expire(channel->efx, 100)) channel->rfs_filters_added -= 60; } #define efx_filter_rfs_enabled() 1 #else -static inline void efx_filter_rfs_expire(struct efx_channel *channel) {} +static inline void efx_filter_rfs_expire(struct work_struct *data) {} #define efx_filter_rfs_enabled() 0 #endif bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); /* RSS contexts */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct list_head *list); -struct efx_rss_context *efx_find_rss_context_entry(u32 id, struct list_head *list); +struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); +struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); void efx_free_rss_context_entry(struct efx_rss_context *ctx); static inline bool efx_rss_active(struct efx_rss_context *ctx) { diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index bb1c80d48d12..3143588ffd77 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -979,7 +979,7 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, { struct efx_nic *efx = netdev_priv(net_dev); u32 rss_context = 0; - s32 rc; + s32 rc = 0; switch (info->cmd) { case ETHTOOL_GRXRINGS: @@ -989,15 +989,17 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, case ETHTOOL_GRXFH: { struct efx_rss_context *ctx = &efx->rss_context; + mutex_lock(&efx->rss_lock); if (info->flow_type & FLOW_RSS && info->rss_context) { - ctx = efx_find_rss_context_entry(info->rss_context, - &efx->rss_context.list); - if (!ctx) - return -ENOENT; + ctx = efx_find_rss_context_entry(efx, info->rss_context); + if (!ctx) { + rc = -ENOENT; + goto out_unlock; + } } info->data = 0; if (!efx_rss_active(ctx)) /* No RSS */ - return 0; + goto out_unlock; switch (info->flow_type & ~FLOW_RSS) { case UDP_V4_FLOW: if (ctx->rx_hash_udp_4tuple) @@ -1024,7 +1026,9 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev, default: break; } - return 0; +out_unlock: + mutex_unlock(&efx->rss_lock); + return rc; } case ETHTOOL_GRXCLSRLCNT: @@ -1084,6 +1088,7 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec; struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec; struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec; + u32 flow_type = rule->flow_type & ~(FLOW_EXT | FLOW_RSS); struct ethhdr *mac_entry = &rule->h_u.ether_spec; struct ethhdr *mac_mask = &rule->m_u.ether_spec; enum efx_filter_flags flags = 0; @@ -1117,14 +1122,14 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, if (rule->flow_type & FLOW_RSS) spec.rss_context = rss_context; - switch (rule->flow_type & ~(FLOW_EXT | FLOW_RSS)) { + switch (flow_type) { case TCP_V4_FLOW: case UDP_V4_FLOW: spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO); spec.ether_type = htons(ETH_P_IP); - spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ? - IPPROTO_TCP : IPPROTO_UDP); + spec.ip_proto = flow_type == TCP_V4_FLOW ? IPPROTO_TCP + : IPPROTO_UDP; if (ip_mask->ip4dst) { if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK) return -EINVAL; @@ -1158,8 +1163,8 @@ static int efx_ethtool_set_class_rule(struct efx_nic *efx, spec.match_flags = (EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO); spec.ether_type = htons(ETH_P_IPV6); - spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ? - IPPROTO_TCP : IPPROTO_UDP); + spec.ip_proto = flow_type == TCP_V6_FLOW ? IPPROTO_TCP + : IPPROTO_UDP; if (!ip6_mask_is_empty(ip6_mask->ip6dst)) { if (!ip6_mask_is_full(ip6_mask->ip6dst)) return -EINVAL; @@ -1366,16 +1371,20 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir, { struct efx_nic *efx = netdev_priv(net_dev); struct efx_rss_context *ctx; - int rc; + int rc = 0; if (!efx->type->rx_pull_rss_context_config) return -EOPNOTSUPP; - ctx = efx_find_rss_context_entry(rss_context, &efx->rss_context.list); - if (!ctx) - return -ENOENT; + + mutex_lock(&efx->rss_lock); + ctx = efx_find_rss_context_entry(efx, rss_context); + if (!ctx) { + rc = -ENOENT; + goto out_unlock; + } rc = efx->type->rx_pull_rss_context_config(efx, ctx); if (rc) - return rc; + goto out_unlock; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; @@ -1383,7 +1392,9 @@ static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, u32 *indir, memcpy(indir, ctx->rx_indir_table, sizeof(ctx->rx_indir_table)); if (key) memcpy(key, ctx->rx_hash_key, efx->type->rx_hash_key_size); - return 0; +out_unlock: + mutex_unlock(&efx->rss_lock); + return rc; } static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, @@ -1401,23 +1412,31 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, /* Hash function is Toeplitz, cannot be changed */ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; + + mutex_lock(&efx->rss_lock); + if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - if (delete) + if (delete) { /* alloc + delete == Nothing to do */ - return -EINVAL; - ctx = efx_alloc_rss_context_entry(&efx->rss_context.list); - if (!ctx) - return -ENOMEM; + rc = -EINVAL; + goto out_unlock; + } + ctx = efx_alloc_rss_context_entry(efx); + if (!ctx) { + rc = -ENOMEM; + goto out_unlock; + } ctx->context_id = EFX_EF10_RSS_CONTEXT_INVALID; /* Initialise indir table and key to defaults */ efx_set_default_rx_indir_table(efx, ctx); netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key)); allocated = true; } else { - ctx = efx_find_rss_context_entry(*rss_context, - &efx->rss_context.list); - if (!ctx) - return -ENOENT; + ctx = efx_find_rss_context_entry(efx, *rss_context); + if (!ctx) { + rc = -ENOENT; + goto out_unlock; + } } if (delete) { @@ -1425,7 +1444,7 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL); if (!rc) efx_free_rss_context_entry(ctx); - return rc; + goto out_unlock; } if (!key) @@ -1438,6 +1457,8 @@ static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, efx_free_rss_context_entry(ctx); else *rss_context = ctx->user_id; +out_unlock: + mutex_unlock(&efx->rss_lock); return rc; } diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index ad001e77d554..4a19c7efdf8d 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -1878,6 +1878,7 @@ struct efx_farch_filter_table { }; struct efx_farch_filter_state { + struct rw_semaphore lock; /* Protects table contents */ struct efx_farch_filter_table table[EFX_FARCH_FILTER_TABLE_COUNT]; }; @@ -2397,9 +2398,13 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, if (rc) return rc; + down_write(&state->lock); + table = &state->table[efx_farch_filter_spec_table_id(&spec)]; - if (table->size == 0) - return -EINVAL; + if (table->size == 0) { + rc = -EINVAL; + goto out_unlock; + } netif_vdbg(efx, hw, efx->net_dev, "%s: type %d search_limit=%d", __func__, spec.type, @@ -2412,8 +2417,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, EFX_FARCH_FILTER_MC_DEF - EFX_FARCH_FILTER_UC_DEF); rep_index = spec.type - EFX_FARCH_FILTER_UC_DEF; ins_index = rep_index; - - spin_lock_bh(&efx->filter_lock); } else { /* Search concurrently for * (1) a filter to be replaced (rep_index): any filter @@ -2443,8 +2446,6 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, ins_index = -1; depth = 1; - spin_lock_bh(&efx->filter_lock); - for (;;) { if (!test_bit(i, table->used_bitmap)) { if (ins_index < 0) @@ -2463,7 +2464,7 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, /* Case (b) */ if (ins_index < 0) { rc = -EBUSY; - goto out; + goto out_unlock; } rep_index = -1; break; @@ -2483,11 +2484,11 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, if (spec.priority == saved_spec->priority && !replace_equal) { rc = -EEXIST; - goto out; + goto out_unlock; } if (spec.priority < saved_spec->priority) { rc = -EPERM; - goto out; + goto out_unlock; } if (saved_spec->priority == EFX_FILTER_PRI_AUTO || saved_spec->flags & EFX_FILTER_FLAG_RX_OVER_AUTO) @@ -2528,8 +2529,8 @@ s32 efx_farch_filter_insert(struct efx_nic *efx, __func__, spec.type, ins_index, spec.dmaq_id); rc = efx_farch_filter_make_id(&spec, ins_index); -out: - spin_unlock_bh(&efx->filter_lock); +out_unlock: + up_write(&state->lock); return rc; } @@ -2604,11 +2605,11 @@ int efx_farch_filter_remove_safe(struct efx_nic *efx, filter_idx = efx_farch_filter_id_index(filter_id); if (filter_idx >= table->size) return -ENOENT; + down_write(&state->lock); spec = &table->spec[filter_idx]; - spin_lock_bh(&efx->filter_lock); rc = efx_farch_filter_remove(efx, table, filter_idx, priority); - spin_unlock_bh(&efx->filter_lock); + up_write(&state->lock); return rc; } @@ -2622,30 +2623,28 @@ int efx_farch_filter_get_safe(struct efx_nic *efx, struct efx_farch_filter_table *table; struct efx_farch_filter_spec *spec; unsigned int filter_idx; - int rc; + int rc = -ENOENT; + + down_read(&state->lock); table_id = efx_farch_filter_id_table_id(filter_id); if ((unsigned int)table_id >= EFX_FARCH_FILTER_TABLE_COUNT) - return -ENOENT; + goto out_unlock; table = &state->table[table_id]; filter_idx = efx_farch_filter_id_index(filter_id); if (filter_idx >= table->size) - return -ENOENT; + goto out_unlock; spec = &table->spec[filter_idx]; - spin_lock_bh(&efx->filter_lock); - if (test_bit(filter_idx, table->used_bitmap) && spec->priority == priority) { efx_farch_filter_to_gen_spec(spec_buf, spec); rc = 0; - } else { - rc = -ENOENT; } - spin_unlock_bh(&efx->filter_lock); - +out_unlock: + up_read(&state->lock); return rc; } @@ -2658,13 +2657,13 @@ efx_farch_filter_table_clear(struct efx_nic *efx, struct efx_farch_filter_table *table = &state->table[table_id]; unsigned int filter_idx; - spin_lock_bh(&efx->filter_lock); + down_write(&state->lock); for (filter_idx = 0; filter_idx < table->size; ++filter_idx) { if (table->spec[filter_idx].priority != EFX_FILTER_PRI_AUTO) efx_farch_filter_remove(efx, table, filter_idx, priority); } - spin_unlock_bh(&efx->filter_lock); + up_write(&state->lock); } int efx_farch_filter_clear_rx(struct efx_nic *efx, @@ -2688,7 +2687,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx, unsigned int filter_idx; u32 count = 0; - spin_lock_bh(&efx->filter_lock); + down_read(&state->lock); for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP; table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF; @@ -2701,7 +2700,7 @@ u32 efx_farch_filter_count_rx_used(struct efx_nic *efx, } } - spin_unlock_bh(&efx->filter_lock); + up_read(&state->lock); return count; } @@ -2716,7 +2715,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx, unsigned int filter_idx; s32 count = 0; - spin_lock_bh(&efx->filter_lock); + down_read(&state->lock); for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP; table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF; @@ -2735,7 +2734,7 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx, } } out: - spin_unlock_bh(&efx->filter_lock); + up_read(&state->lock); return count; } @@ -2749,7 +2748,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx) efx_oword_t filter; unsigned int filter_idx; - spin_lock_bh(&efx->filter_lock); + down_write(&state->lock); for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) { table = &state->table[table_id]; @@ -2770,7 +2769,7 @@ void efx_farch_filter_table_restore(struct efx_nic *efx) efx_farch_filter_push_rx_config(efx); efx_farch_filter_push_tx_limits(efx); - spin_unlock_bh(&efx->filter_lock); + up_write(&state->lock); } void efx_farch_filter_table_remove(struct efx_nic *efx) @@ -2864,7 +2863,7 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx) efx_oword_t filter; unsigned int filter_idx; - spin_lock_bh(&efx->filter_lock); + down_write(&state->lock); for (table_id = EFX_FARCH_FILTER_TABLE_RX_IP; table_id <= EFX_FARCH_FILTER_TABLE_RX_DEF; @@ -2896,33 +2895,30 @@ void efx_farch_filter_update_rx_scatter(struct efx_nic *efx) efx_farch_filter_push_rx_config(efx); - spin_unlock_bh(&efx->filter_lock); + up_write(&state->lock); } #ifdef CONFIG_RFS_ACCEL -s32 efx_farch_filter_rfs_insert(struct efx_nic *efx, - struct efx_filter_spec *gen_spec) -{ - return efx_farch_filter_insert(efx, gen_spec, true); -} - bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, unsigned int index) { struct efx_farch_filter_state *state = efx->filter_state; - struct efx_farch_filter_table *table = - &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; + struct efx_farch_filter_table *table; + bool ret = false; + down_write(&state->lock); + table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP]; if (test_bit(index, table->used_bitmap) && table->spec[index].priority == EFX_FILTER_PRI_HINT && rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id, flow_id, index)) { efx_farch_filter_table_clear_entry(efx, table, index); - return true; + ret = true; } - return false; + up_write(&state->lock); + return ret; } #endif /* CONFIG_RFS_ACCEL */ diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 2453f3849e72..5e379a83c729 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -430,6 +430,7 @@ enum efx_sync_events_state { * @event_test_cpu: Last CPU to handle interrupt or test event for this channel * @irq_count: Number of IRQs since last adaptive moderation decision * @irq_mod_score: IRQ moderation score + * @filter_work: Work item for efx_filter_rfs_expire() * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, * indexed by filter ID * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors @@ -475,6 +476,7 @@ struct efx_channel { unsigned int irq_mod_score; #ifdef CONFIG_RFS_ACCEL unsigned int rfs_filters_added; + struct work_struct filter_work; #define RPS_FLOW_ID_INVALID 0xFFFFFFFF u32 *rps_flow_id; #endif @@ -794,6 +796,7 @@ struct efx_rss_context { * @rx_scatter: Scatter mode enabled for receives * @rss_context: Main RSS context. Its @list member is the head of the list of * RSS contexts created by user requests + * @rss_lock: Protects custom RSS context software state in @rss_context.list * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will @@ -841,9 +844,9 @@ struct efx_rss_context { * @loopback_mode: Loopback status * @loopback_modes: Supported loopback mode bitmask * @loopback_selftest: Offline self-test private state - * @filter_sem: Filter table rw_semaphore, for freeing the table - * @filter_lock: Filter table lock, for mere content changes + * @filter_sem: Filter table rw_semaphore, protects existence of @filter_state * @filter_state: Architecture-dependent filter table state + * @rps_mutex: Protects RPS state of all channels * @rps_expire_channel: Next channel to check for expiry * @rps_expire_index: Next index to check for expiry in * @rps_expire_channel's @rps_flow_id @@ -938,6 +941,7 @@ struct efx_nic { int rx_packet_ts_offset; bool rx_scatter; struct efx_rss_context rss_context; + struct mutex rss_lock; unsigned int_error_count; unsigned long int_error_expire; @@ -995,9 +999,9 @@ struct efx_nic { void *loopback_selftest; struct rw_semaphore filter_sem; - spinlock_t filter_lock; void *filter_state; #ifdef CONFIG_RFS_ACCEL + struct mutex rps_mutex; unsigned int rps_expire_channel; unsigned int rps_expire_index; #endif @@ -1152,10 +1156,6 @@ struct efx_udp_tunnel { * @filter_count_rx_used: Get the number of filters in use at a given priority * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1 * @filter_get_rx_ids: Get list of RX filters at a given priority - * @filter_rfs_insert: Add or replace a filter for RFS. This must be - * atomic. The hardware change may be asynchronous but should - * not be delayed for long. It may fail if this can't be done - * atomically. * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS. * This must check whether the specified table entry is used by RFS * and that rps_may_expire_flow() returns true for it. @@ -1306,8 +1306,6 @@ struct efx_nic_type { enum efx_filter_priority priority, u32 *buf, u32 size); #ifdef CONFIG_RFS_ACCEL - s32 (*filter_rfs_insert)(struct efx_nic *efx, - struct efx_filter_spec *spec); bool (*filter_rfs_expire_one)(struct efx_nic *efx, u32 flow_id, unsigned int index); #endif diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index d080a414e8f2..5640034bda10 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -365,6 +365,8 @@ enum { * @vi_base: Absolute index of first VI in this function * @n_allocated_vis: Number of VIs allocated to this function * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot + * @must_restore_rss_contexts: Flag: RSS contexts have yet to be restored after + * MC reboot * @must_restore_filters: Flag: filters have yet to be restored after MC reboot * @n_piobufs: Number of PIO buffers allocated to this function * @wc_membase: Base address of write-combining mapping of the memory BAR @@ -407,6 +409,7 @@ struct efx_ef10_nic_data { unsigned int vi_base; unsigned int n_allocated_vis; bool must_realloc_vis; + bool must_restore_rss_contexts; bool must_restore_filters; unsigned int n_piobufs; void __iomem *wc_membase, *pio_write_base; @@ -601,8 +604,6 @@ s32 efx_farch_filter_get_rx_ids(struct efx_nic *efx, enum efx_filter_priority priority, u32 *buf, u32 size); #ifdef CONFIG_RFS_ACCEL -s32 efx_farch_filter_rfs_insert(struct efx_nic *efx, - struct efx_filter_spec *spec); bool efx_farch_filter_rfs_expire_one(struct efx_nic *efx, u32 flow_id, unsigned int index); #endif diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index cfe76aad79ee..95682831484e 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -827,14 +827,67 @@ MODULE_PARM_DESC(rx_refill_threshold, #ifdef CONFIG_RFS_ACCEL +/** + * struct efx_async_filter_insertion - Request to asynchronously insert a filter + * @net_dev: Reference to the netdevice + * @spec: The filter to insert + * @work: Workitem for this request + * @rxq_index: Identifies the channel for which this request was made + * @flow_id: Identifies the kernel-side flow for which this request was made + */ +struct efx_async_filter_insertion { + struct net_device *net_dev; + struct efx_filter_spec spec; + struct work_struct work; + u16 rxq_index; + u32 flow_id; +}; + +static void efx_filter_rfs_work(struct work_struct *data) +{ + struct efx_async_filter_insertion *req = container_of(data, struct efx_async_filter_insertion, + work); + struct efx_nic *efx = netdev_priv(req->net_dev); + struct efx_channel *channel = efx_get_channel(efx, req->rxq_index); + int rc; + + rc = efx->type->filter_insert(efx, &req->spec, false); + if (rc >= 0) { + /* Remember this so we can check whether to expire the filter + * later. + */ + mutex_lock(&efx->rps_mutex); + channel->rps_flow_id[rc] = req->flow_id; + ++channel->rfs_filters_added; + mutex_unlock(&efx->rps_mutex); + + if (req->spec.ether_type == htons(ETH_P_IP)) + netif_info(efx, rx_status, efx->net_dev, + "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc); + else + netif_info(efx, rx_status, efx->net_dev, + "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", + (req->spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", + req->spec.rem_host, ntohs(req->spec.rem_port), + req->spec.loc_host, ntohs(req->spec.loc_port), + req->rxq_index, req->flow_id, rc); + } + + /* Release references */ + dev_put(req->net_dev); + kfree(req); +} + int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, u16 rxq_index, u32 flow_id) { struct efx_nic *efx = netdev_priv(net_dev); - struct efx_channel *channel; - struct efx_filter_spec spec; + struct efx_async_filter_insertion *req; struct flow_keys fk; - int rc; if (flow_id == RPS_FLOW_ID_INVALID) return -EINVAL; @@ -847,50 +900,39 @@ int efx_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb, if (fk.control.flags & FLOW_DIS_IS_FRAGMENT) return -EPROTONOSUPPORT; - efx_filter_init_rx(&spec, EFX_FILTER_PRI_HINT, + req = kmalloc(sizeof(*req), GFP_ATOMIC); + if (!req) + return -ENOMEM; + + efx_filter_init_rx(&req->spec, EFX_FILTER_PRI_HINT, efx->rx_scatter ? EFX_FILTER_FLAG_RX_SCATTER : 0, rxq_index); - spec.match_flags = + req->spec.match_flags = EFX_FILTER_MATCH_ETHER_TYPE | EFX_FILTER_MATCH_IP_PROTO | EFX_FILTER_MATCH_LOC_HOST | EFX_FILTER_MATCH_LOC_PORT | EFX_FILTER_MATCH_REM_HOST | EFX_FILTER_MATCH_REM_PORT; - spec.ether_type = fk.basic.n_proto; - spec.ip_proto = fk.basic.ip_proto; + req->spec.ether_type = fk.basic.n_proto; + req->spec.ip_proto = fk.basic.ip_proto; if (fk.basic.n_proto == htons(ETH_P_IP)) { - spec.rem_host[0] = fk.addrs.v4addrs.src; - spec.loc_host[0] = fk.addrs.v4addrs.dst; + req->spec.rem_host[0] = fk.addrs.v4addrs.src; + req->spec.loc_host[0] = fk.addrs.v4addrs.dst; } else { - memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr)); - memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr)); + memcpy(req->spec.rem_host, &fk.addrs.v6addrs.src, + sizeof(struct in6_addr)); + memcpy(req->spec.loc_host, &fk.addrs.v6addrs.dst, + sizeof(struct in6_addr)); } - spec.rem_port = fk.ports.src; - spec.loc_port = fk.ports.dst; - - rc = efx->type->filter_rfs_insert(efx, &spec); - if (rc < 0) - return rc; + req->spec.rem_port = fk.ports.src; + req->spec.loc_port = fk.ports.dst; - /* Remember this so we can check whether to expire the filter later */ - channel = efx_get_channel(efx, rxq_index); - channel->rps_flow_id[rc] = flow_id; - ++channel->rfs_filters_added; - - if (spec.ether_type == htons(ETH_P_IP)) - netif_info(efx, rx_status, efx->net_dev, - "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n", - (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", - spec.rem_host, ntohs(spec.rem_port), spec.loc_host, - ntohs(spec.loc_port), rxq_index, flow_id, rc); - else - netif_info(efx, rx_status, efx->net_dev, - "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n", - (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP", - spec.rem_host, ntohs(spec.rem_port), spec.loc_host, - ntohs(spec.loc_port), rxq_index, flow_id, rc); - - return rc; + dev_hold(req->net_dev = net_dev); + INIT_WORK(&req->work, efx_filter_rfs_work); + req->rxq_index = rxq_index; + req->flow_id = flow_id; + schedule_work(&req->work); + return 0; } bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) @@ -899,9 +941,8 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) unsigned int channel_idx, index, size; u32 flow_id; - if (!spin_trylock_bh(&efx->filter_lock)) + if (!mutex_trylock(&efx->rps_mutex)) return false; - expire_one = efx->type->filter_rfs_expire_one; channel_idx = efx->rps_expire_channel; index = efx->rps_expire_index; @@ -926,7 +967,7 @@ bool __efx_filter_rfs_expire(struct efx_nic *efx, unsigned int quota) efx->rps_expire_channel = channel_idx; efx->rps_expire_index = index; - spin_unlock_bh(&efx->filter_lock); + mutex_unlock(&efx->rps_mutex); return true; } diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 18aab25234ba..65161f68265a 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -1035,7 +1035,6 @@ const struct efx_nic_type siena_a0_nic_type = { .filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit, .filter_get_rx_ids = efx_farch_filter_get_rx_ids, #ifdef CONFIG_RFS_ACCEL - .filter_rfs_insert = efx_farch_filter_rfs_insert, .filter_rfs_expire_one = efx_farch_filter_rfs_expire_one, #endif #ifdef CONFIG_SFC_MTD |