diff options
5 files changed, 238 insertions, 5 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h index 81ed06e58fea..93e911baacad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h @@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry { enum { /* set when the encap entry is successfully offloaded into HW */ MLX5_ENCAP_ENTRY_VALID = BIT(0), + MLX5_REFORMAT_DECAP = BIT(1), +}; + +struct mlx5e_decap_key { + struct ethhdr key; +}; + +struct mlx5e_decap_entry { + struct mlx5e_decap_key key; + struct list_head flows; + struct hlist_node hlist; + refcount_t refcnt; + struct completion res_ready; + int compl_result; + struct mlx5_pkt_reformat *pkt_reformat; + struct rcu_head rcu; }; struct mlx5e_encap_entry { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 801fcd1b5f85..a6b18f0444e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -46,6 +46,7 @@ #include <net/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_pedit.h> #include <net/tc_act/tc_csum.h> +#include <net/tc_act/tc_mpls.h> #include <net/arp.h> #include <net/ipv6_stubs.h> #include <net/bareudp.h> @@ -93,6 +94,7 @@ enum { MLX5E_TC_FLOW_FLAG_NOT_READY = MLX5E_TC_FLOW_BASE + 5, MLX5E_TC_FLOW_FLAG_DELETED = MLX5E_TC_FLOW_BASE + 6, MLX5E_TC_FLOW_FLAG_CT = MLX5E_TC_FLOW_BASE + 7, + MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8, }; #define MLX5E_TC_MAX_SPLITS 1 @@ -126,6 +128,11 @@ struct mlx5e_tc_flow { u64 cookie; unsigned long flags; struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1]; + + /* flows sharing the same reformat object - currently mpls decap */ + struct list_head l3_to_l2_reformat; + struct mlx5e_decap_entry *decap_reformat; + /* Flow can be associated with multiple encap IDs. * The number of encaps is bounded by the number of supported * destinations. @@ -157,6 +164,7 @@ struct mlx5e_tc_flow_parse_attr { struct mlx5_flow_spec spec; struct mlx5e_tc_mod_hdr_acts mod_hdr_acts; int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS]; + struct ethhdr eth; }; #define MLX5E_TC_TABLE_NUM_GROUPS 4 @@ -1124,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv, struct netlink_ext_ack *extack, struct net_device **encap_dev, bool *encap_valid); +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack); +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow); static struct mlx5_flow_handle * mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw, @@ -1299,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, return -EOPNOTSUPP; } + if (flow_flag_test(flow, L3_TO_L2_DECAP)) { + err = mlx5e_attach_decap(priv, flow, extack); + if (err) + return err; + } + for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) { int mirred_ifindex; @@ -1408,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv, if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) mlx5_fc_destroy(attr->counter_dev, attr->counter); + + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + mlx5e_detach_decap(priv, flow); } void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, @@ -1684,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr kfree_rcu(e, rcu); } +static void mlx5e_decap_dealloc(struct mlx5e_priv *priv, + struct mlx5e_decap_entry *d) +{ + WARN_ON(!list_empty(&d->flows)); + + if (!d->compl_result) + mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat); + + kfree_rcu(d, rcu); +} + void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -1696,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e) mlx5e_encap_dealloc(priv, e); } +static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + + if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock)) + return; + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void mlx5e_detach_encap(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow, int out_index) { @@ -1719,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv, mlx5e_encap_dealloc(priv, e); } +static void mlx5e_detach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_entry *d = flow->decap_reformat; + + if (!d) + return; + + mutex_lock(&esw->offloads.decap_tbl_lock); + list_del(&flow->l3_to_l2_reformat); + flow->decap_reformat = NULL; + + if (!refcount_dec_and_test(&d->refcnt)) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + return; + } + hash_del_rcu(&d->hlist); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + mlx5e_decap_dealloc(priv, d); +} + static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow) { struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch; @@ -1990,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv, return err; } - flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; + /* With mpls over udp we decapsulate using packet reformat + * object + */ + if (!netif_is_bareudp(filter_dev)) + flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; } if (!needs_mapping && !sets_mapping) @@ -3285,12 +3357,22 @@ static inline int cmp_encap_info(struct encap_key *a, a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type; } +static inline int cmp_decap_info(struct mlx5e_decap_key *a, + struct mlx5e_decap_key *b) +{ + return memcmp(&a->key, &b->key, sizeof(b->key)); +} + static inline int hash_encap_info(struct encap_key *key) { return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key), key->tc_tunnel->tunnel_type); } +static inline int hash_decap_info(struct mlx5e_decap_key *key) +{ + return jhash(&key->key, sizeof(key->key), 0); +} static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, struct net_device *peer_netdev) @@ -3305,13 +3387,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv, same_hw_devs(priv, peer_priv)); } - - bool mlx5e_encap_take(struct mlx5e_encap_entry *e) { return refcount_inc_not_zero(&e->refcnt); } +static bool mlx5e_decap_take(struct mlx5e_decap_entry *e) +{ + return refcount_inc_not_zero(&e->refcnt); +} + static struct mlx5e_encap_entry * mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, uintptr_t hash_key) @@ -3332,6 +3417,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key, return NULL; } +static struct mlx5e_decap_entry * +mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key, + uintptr_t hash_key) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5e_decap_key r_key; + struct mlx5e_decap_entry *e; + + hash_for_each_possible_rcu(esw->offloads.decap_tbl, e, + hlist, hash_key) { + r_key = e->key; + if (!cmp_decap_info(&r_key, key) && + mlx5e_decap_take(e)) + return e; + } + return NULL; +} + static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info) { size_t tun_size = sizeof(*tun_info) + tun_info->options_len; @@ -3477,6 +3580,84 @@ out_err_init: return err; } +static int mlx5e_attach_decap(struct mlx5e_priv *priv, + struct mlx5e_tc_flow *flow, + struct netlink_ext_ack *extack) +{ + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + struct mlx5_esw_flow_attr *attr = flow->esw_attr; + struct mlx5e_tc_flow_parse_attr *parse_attr; + struct mlx5e_decap_entry *d; + struct mlx5e_decap_key key; + uintptr_t hash_key; + int err; + + parse_attr = attr->parse_attr; + if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) { + NL_SET_ERR_MSG_MOD(extack, + "encap header larger than max supported"); + return -EOPNOTSUPP; + } + + key.key = parse_attr->eth; + hash_key = hash_decap_info(&key); + mutex_lock(&esw->offloads.decap_tbl_lock); + d = mlx5e_decap_get(priv, &key, hash_key); + if (d) { + mutex_unlock(&esw->offloads.decap_tbl_lock); + wait_for_completion(&d->res_ready); + mutex_lock(&esw->offloads.decap_tbl_lock); + if (d->compl_result) { + err = -EREMOTEIO; + goto out_free; + } + goto found; + } + + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) { + err = -ENOMEM; + goto out_err; + } + + d->key = key; + refcount_set(&d->refcnt, 1); + init_completion(&d->res_ready); + INIT_LIST_HEAD(&d->flows); + hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key); + mutex_unlock(&esw->offloads.decap_tbl_lock); + + d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev, + MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2, + sizeof(parse_attr->eth), + &parse_attr->eth, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(d->pkt_reformat)) { + err = PTR_ERR(d->pkt_reformat); + d->compl_result = err; + } + mutex_lock(&esw->offloads.decap_tbl_lock); + complete_all(&d->res_ready); + if (err) + goto out_free; + +found: + flow->decap_reformat = d; + attr->decap_pkt_reformat = d->pkt_reformat; + list_add(&flow->l3_to_l2_reformat, &d->flows); + mutex_unlock(&esw->offloads.decap_tbl_lock); + return 0; + +out_free: + mutex_unlock(&esw->offloads.decap_tbl_lock); + mlx5e_decap_put(priv, d); + return err; + +out_err: + mutex_unlock(&esw->offloads.decap_tbl_lock); + return err; +} + static int parse_tc_vlan_action(struct mlx5e_priv *priv, const struct flow_action_entry *act, struct mlx5_esw_flow_attr *attr, @@ -3688,7 +3869,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv, static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct flow_action *flow_action, struct mlx5e_tc_flow *flow, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + struct net_device *filter_dev) { struct pedit_headers_action hdrs[2] = {}; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; @@ -3727,8 +3909,32 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, } mpls_push = true; break; + case FLOW_ACTION_MPLS_POP: + /* we only support mpls pop if it is the first action + * and the filter net device is bareudp. Subsequent + * actions can be pedit and the last can be mirred + * egress redirect. + */ + if (i) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only as first action"); + return -EOPNOTSUPP; + } + if (!netif_is_bareudp(filter_dev)) { + NL_SET_ERR_MSG_MOD(extack, + "mpls pop supported only on bareudp devices"); + return -EOPNOTSUPP; + } + + parse_attr->eth.h_proto = act->mpls_pop.proto; + action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_flag_set(flow, L3_TO_L2_DECAP); + break; case FLOW_ACTION_MANGLE: case FLOW_ACTION_ADD: + if (flow_flag_test(flow, L3_TO_L2_DECAP)) + return -EOPNOTSUPP; + err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB, hdrs, extack); if (err) @@ -4093,6 +4299,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size, INIT_LIST_HEAD(&flow->encaps[out_index].list); INIT_LIST_HEAD(&flow->mod_hdr); INIT_LIST_HEAD(&flow->hairpin); + INIT_LIST_HEAD(&flow->l3_to_l2_reformat); refcount_set(&flow->refcnt, 1); init_completion(&flow->init_done); @@ -4162,7 +4369,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv, if (err) goto err_free; - err = parse_tc_fdb_actions(priv, &rule->action, flow, extack); + err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev); if (err) goto err_free; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index c5eb4e7754a9..ac79b7c9aeb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) hash_init(esw->offloads.encap_tbl); mutex_init(&esw->offloads.mod_hdr.lock); hash_init(esw->offloads.mod_hdr.hlist); + mutex_init(&esw->offloads.decap_tbl_lock); + hash_init(esw->offloads.decap_tbl); atomic64_set(&esw->offloads.num_flows, 0); mutex_init(&esw->state_lock); mutex_init(&esw->mode_lock); @@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) mutex_destroy(&esw->state_lock); mutex_destroy(&esw->offloads.mod_hdr.lock); mutex_destroy(&esw->offloads.encap_tbl_lock); + mutex_destroy(&esw->offloads.decap_tbl_lock); kfree(esw->vports); kfree(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 4a1c6c78bb14..ccbbea3e0505 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -209,6 +209,8 @@ struct mlx5_esw_offload { struct mutex peer_mutex; struct mutex encap_tbl_lock; /* protects encap_tbl */ DECLARE_HASHTABLE(encap_tbl, 8); + struct mutex decap_tbl_lock; /* protects decap_tbl */ + DECLARE_HASHTABLE(decap_tbl, 8); struct mod_hdr_tbl mod_hdr; DECLARE_HASHTABLE(termtbl_tbl, 8); struct mutex termtbl_mutex; /* protects termtbl hash */ @@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr { struct mlx5_flow_table *fdb; struct mlx5_flow_table *dest_ft; struct mlx5_ct_attr ct_attr; + struct mlx5_pkt_reformat *decap_pkt_reformat; struct mlx5e_tc_flow_parse_attr *parse_attr; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 1c9be19ee025..554fc64d8ef6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } } } + + if (attr->decap_pkt_reformat) + flow_act.pkt_reformat = attr->decap_pkt_reformat; + if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; dest[i].counter_id = mlx5_fc_id(attr->counter); |