From 6749d59016981bca6d7000e40bdb08eed78dfa6f Mon Sep 17 00:00:00 2001 From: John Hurley Date: Tue, 23 Jul 2019 15:33:59 +0100 Subject: net: sched: include mpls actions in hardware intermediate representation A recent addition to TC actions is the ability to manipulate the MPLS headers on packets. In preparation to offload such actions to hardware, update the IR code to accept and prepare the new actions. Note that no driver currently impliments the MPLS dec_ttl action so this is not included. Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index efd3cfb80a2a..3565d9aa09aa 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -36,6 +36,7 @@ #include #include #include +#include extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -3269,6 +3270,30 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + goto err_out; + } } else { goto err_out; } -- cgit v1.2.3 From fb1b775a247ee8d846152841f780eba6cb71bcfc Mon Sep 17 00:00:00 2001 From: John Hurley Date: Sun, 4 Aug 2019 16:09:04 +0100 Subject: net: sched: add skbedit of ptype action to hardware IR TC rules can impliment skbedit actions. Currently actions that modify the skb mark are passed to offloading drivers via the hardware intermediate representation in the flow_offload API. Extend this to include skbedit actions that modify the packet type of the skb. Such actions may be used to set the ptype to HOST when redirecting a packet to ingress. Signed-off-by: John Hurley Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 ++ net/sched/cls_api.c | 3 +++ 2 files changed, 5 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 00b9aab5fdc1..04c29f5bb60a 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -126,6 +126,7 @@ enum flow_action_id { FLOW_ACTION_ADD, FLOW_ACTION_CSUM, FLOW_ACTION_MARK, + FLOW_ACTION_PTYPE, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, @@ -168,6 +169,7 @@ struct flow_action_entry { const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ + u16 ptype; /* FLOW_ACTION_PTYPE */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; u32 index; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3565d9aa09aa..ae73d3705571 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3294,6 +3294,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, default: goto err_out; } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); } else { goto err_out; } -- cgit v1.2.3 From 48e584ac583b08a923d4d872596cc7b049e99f12 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Sun, 4 Aug 2019 16:09:06 +0100 Subject: net: sched: add ingress mirred action to hardware IR TC mirred actions (redirect and mirred) can send to egress or ingress of a device. Currently only egress is used for hw offload rules. Modify the intermediate representation for hw offload to include mirred actions that go to ingress. This gives drivers access to such rules and can decide whether or not to offload them. Signed-off-by: John Hurley Reviewed-by: Simon Horman Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow_offload.h | 2 ++ net/sched/cls_api.c | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 04c29f5bb60a..d3b12bc8a114 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -117,6 +117,8 @@ enum flow_action_id { FLOW_ACTION_GOTO, FLOW_ACTION_REDIRECT, FLOW_ACTION_MIRRED, + FLOW_ACTION_REDIRECT_INGRESS, + FLOW_ACTION_MIRRED_INGRESS, FLOW_ACTION_VLAN_PUSH, FLOW_ACTION_VLAN_POP, FLOW_ACTION_VLAN_MANGLE, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ae73d3705571..9d85d3295c7c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3205,6 +3205,12 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + entry->dev = tcf_mirred_dev(act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + entry->dev = tcf_mirred_dev(act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: -- cgit v1.2.3 From 242453c227d14751fed0a8809a58f1bf3c7d837a Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 7 Aug 2019 09:13:49 +0800 Subject: cls_api: modify the tc_indr_block_ing_cmd parameters. This patch make tc_indr_block_ing_cmd can't access struct tc_indr_block_dev and tc_indr_block_cb. Signed-off-by: wenxu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 9d85d3295c7c..1dd210d353b7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -677,26 +677,28 @@ static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); -static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, - struct tc_indr_block_cb *indr_block_cb, +static void tc_indr_block_ing_cmd(struct net_device *dev, + struct tcf_block *block, + tc_indr_block_bind_cb_t *cb, + void *cb_priv, enum flow_block_command command) { struct flow_block_offload bo = { .command = command, .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_non_null_shared(indr_dev->block), + .net = dev_net(dev), + .block_shared = tcf_block_non_null_shared(block), }; INIT_LIST_HEAD(&bo.cb_list); - if (!indr_dev->block) + if (!block) return; - bo.block = &indr_dev->block->flow_block; + bo.block = &block->flow_block; - indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - tcf_block_setup(indr_dev->block, &bo); + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); + + tcf_block_setup(block, &bo); } int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, @@ -715,7 +717,8 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, if (err) goto err_dev_put; - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND); + tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv, + FLOW_BLOCK_BIND); return 0; err_dev_put: @@ -752,7 +755,8 @@ void __tc_indr_block_cb_unregister(struct net_device *dev, return; /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND); + tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv, + FLOW_BLOCK_UNBIND); tc_indr_block_cb_del(indr_block_cb); tc_indr_block_dev_put(indr_dev); } -- cgit v1.2.3 From f8436988574e50fe69f6f193ee76c3185b17a5c7 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 7 Aug 2019 09:13:50 +0800 Subject: cls_api: remove the tcf_block cache Remove the tcf_block in the tc_indr_block_dev for muti-subsystem support. Signed-off-by: wenxu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1dd210d353b7..12eaa6c90e79 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -574,7 +574,6 @@ struct tc_indr_block_dev { struct net_device *dev; unsigned int refcnt; struct list_head cb_list; - struct tcf_block *block; }; struct tc_indr_block_cb { @@ -611,7 +610,6 @@ static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) INIT_LIST_HEAD(&indr_dev->cb_list); indr_dev->dev = dev; - indr_dev->block = tc_dev_ingress_block(dev); if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, tc_indr_setup_block_ht_params)) { kfree(indr_dev); @@ -706,6 +704,7 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, { struct tc_indr_block_cb *indr_block_cb; struct tc_indr_block_dev *indr_dev; + struct tcf_block *block; int err; indr_dev = tc_indr_block_dev_get(dev); @@ -717,8 +716,9 @@ int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, if (err) goto err_dev_put; - tc_indr_block_ing_cmd(dev, indr_dev->block, cb, cb_priv, - FLOW_BLOCK_BIND); + block = tc_dev_ingress_block(dev); + tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb, + indr_block_cb->cb_priv, FLOW_BLOCK_BIND); return 0; err_dev_put: @@ -745,6 +745,7 @@ void __tc_indr_block_cb_unregister(struct net_device *dev, { struct tc_indr_block_cb *indr_block_cb; struct tc_indr_block_dev *indr_dev; + struct tcf_block *block; indr_dev = tc_indr_block_dev_lookup(dev); if (!indr_dev) @@ -755,8 +756,9 @@ void __tc_indr_block_cb_unregister(struct net_device *dev, return; /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(dev, indr_dev->block, cb, indr_block_cb->cb_priv, - FLOW_BLOCK_UNBIND); + block = tc_dev_ingress_block(dev); + tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb, + indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND); tc_indr_block_cb_del(indr_block_cb); tc_indr_block_dev_put(indr_dev); } @@ -792,8 +794,6 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, if (!indr_dev) return; - indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL; - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, &bo); -- cgit v1.2.3 From e4da91021153b78b9f2972212610bc71263925f4 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 7 Aug 2019 09:13:51 +0800 Subject: cls_api: add flow_indr_block_call function This patch make indr_block_call don't access struct tc_indr_block_cb and tc_indr_block_dev directly Signed-off-by: wenxu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_api.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 12eaa6c90e79..7c34fc6851c3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -773,13 +773,27 @@ void tc_indr_block_cb_unregister(struct net_device *dev, } EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); +static void flow_indr_block_call(struct net_device *dev, + struct flow_block_offload *bo, + enum flow_block_command command) +{ + struct tc_indr_block_cb *indr_block_cb; + struct tc_indr_block_dev *indr_dev; + + indr_dev = tc_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + bo); +} + static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; struct flow_block_offload bo = { .command = command, .binder_type = ei->binder_type, @@ -790,14 +804,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } -- cgit v1.2.3 From 4e481908c51bf02457aecdedc2d80e1be22e0146 Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 7 Aug 2019 09:13:52 +0800 Subject: flow_offload: move tc indirect block to flow offload move tc indirect block to flow_offload and rename it to flow indirect block.The nf_tables can use the indr block architecture. Signed-off-by: wenxu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 10 +- .../net/ethernet/netronome/nfp/flower/offload.c | 11 +- include/net/flow_offload.h | 29 +++ include/net/pkt_cls.h | 35 --- include/net/sch_generic.h | 3 - net/core/flow_offload.c | 215 ++++++++++++++++++ net/sched/cls_api.c | 240 +++------------------ 7 files changed, 280 insertions(+), 263 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index fbb9de633578..b7f113e996e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -781,9 +781,9 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, { int err; - err = __tc_indr_block_cb_register(netdev, rpriv, - mlx5e_rep_indr_setup_tc_cb, - rpriv); + err = __flow_indr_block_cb_register(netdev, rpriv, + mlx5e_rep_indr_setup_tc_cb, + rpriv); if (err) { struct mlx5e_priv *priv = netdev_priv(rpriv->netdev); @@ -796,8 +796,8 @@ static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv, static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv, struct net_device *netdev) { - __tc_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, - rpriv); + __flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_tc_cb, + rpriv); } static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb, diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index ff8a9f1a38f8..3a4f4f042ae7 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -1649,16 +1649,17 @@ int nfp_flower_reg_indir_block_handler(struct nfp_app *app, return NOTIFY_OK; if (event == NETDEV_REGISTER) { - err = __tc_indr_block_cb_register(netdev, app, - nfp_flower_indr_setup_tc_cb, - app); + err = __flow_indr_block_cb_register(netdev, app, + nfp_flower_indr_setup_tc_cb, + app); if (err) nfp_flower_cmsg_warn(app, "Indirect block reg failed - %s\n", netdev->name); } else if (event == NETDEV_UNREGISTER) { - __tc_indr_block_cb_unregister(netdev, - nfp_flower_indr_setup_tc_cb, app); + __flow_indr_block_cb_unregister(netdev, + nfp_flower_indr_setup_tc_cb, + app); } return NOTIFY_OK; diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index d3b12bc8a114..46b8777ad05d 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -4,6 +4,7 @@ #include #include #include +#include struct flow_match { struct flow_dissector *dissector; @@ -370,4 +371,32 @@ static inline void flow_block_init(struct flow_block *flow_block) INIT_LIST_HEAD(&flow_block->cb_list); } +typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, + enum tc_setup_type type, void *type_data); + +typedef void flow_indr_block_ing_cmd_t(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command); + +int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +void __flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, void *cb_ident); + +void flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident); + +void flow_indr_block_call(struct net_device *dev, + flow_indr_block_ing_cmd_t *cb, + struct flow_block_offload *bo, + enum flow_block_command command); + #endif /* _NET_FLOW_OFFLOAD_H */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index e429809ca90d..0790a4ed909c 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -70,15 +70,6 @@ static inline struct Qdisc *tcf_block_q(struct tcf_block *block) return block->q; } -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident); - int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode); @@ -137,32 +128,6 @@ void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb, { } -static inline -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - return 0; -} - -static inline -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - -static inline -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ -} - static inline int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res, bool compat_mode) { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6b6b01234dd9..d9f359af0b93 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -23,9 +23,6 @@ struct tcf_walker; struct module; struct bpf_flow_keys; -typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv, - enum tc_setup_type type, void *type_data); - struct qdisc_rate_table { struct tc_ratespec rate; u32 data[256]; diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d63b970784dc..4cc18e462d50 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -2,6 +2,7 @@ #include #include #include +#include struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -280,3 +281,217 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, } } EXPORT_SYMBOL(flow_block_cb_setup_simple); + +static struct rhashtable indr_setup_block_ht; + +struct flow_indr_block_cb { + struct list_head list; + void *cb_priv; + flow_indr_block_bind_cb_t *cb; + void *cb_ident; +}; + +struct flow_indr_block_dev { + struct rhash_head ht_node; + struct net_device *dev; + unsigned int refcnt; + flow_indr_block_ing_cmd_t *block_ing_cmd_cb; + struct list_head cb_list; +}; + +static const struct rhashtable_params flow_indr_setup_block_ht_params = { + .key_offset = offsetof(struct flow_indr_block_dev, dev), + .head_offset = offsetof(struct flow_indr_block_dev, ht_node), + .key_len = sizeof(struct net_device *), +}; + +static struct flow_indr_block_dev * +flow_indr_block_dev_lookup(struct net_device *dev) +{ + return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, + flow_indr_setup_block_ht_params); +} + +static struct flow_indr_block_dev * +flow_indr_block_dev_get(struct net_device *dev) +{ + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (indr_dev) + goto inc_ref; + + indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); + if (!indr_dev) + return NULL; + + INIT_LIST_HEAD(&indr_dev->cb_list); + indr_dev->dev = dev; + if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params)) { + kfree(indr_dev); + return NULL; + } + +inc_ref: + indr_dev->refcnt++; + return indr_dev; +} + +static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev) +{ + if (--indr_dev->refcnt) + return; + + rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params); + kfree(indr_dev); +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + if (indr_block_cb->cb == cb && + indr_block_cb->cb_ident == cb_ident) + return indr_block_cb; + return NULL; +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (indr_block_cb) + return ERR_PTR(-EEXIST); + + indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); + if (!indr_block_cb) + return ERR_PTR(-ENOMEM); + + indr_block_cb->cb_priv = cb_priv; + indr_block_cb->cb = cb; + indr_block_cb->cb_ident = cb_ident; + list_add(&indr_block_cb->list, &indr_dev->cb_list); + + return indr_block_cb; +} + +static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb) +{ + list_del(&indr_block_cb->list); + kfree(indr_block_cb); +} + +int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + int err; + + indr_dev = flow_indr_block_dev_get(dev); + if (!indr_dev) + return -ENOMEM; + + indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); + err = PTR_ERR_OR_ZERO(indr_block_cb); + if (err) + goto err_dev_put; + + if (indr_dev->block_ing_cmd_cb) + indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb, + indr_block_cb->cb_priv, + FLOW_BLOCK_BIND); + + return 0; + +err_dev_put: + flow_indr_block_dev_put(indr_dev); + return err; +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register); + +int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + int err; + + rtnl_lock(); + err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident); + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_register); + +void __flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (!indr_block_cb) + return; + + if (indr_dev->block_ing_cmd_cb) + indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb, + indr_block_cb->cb_priv, + FLOW_BLOCK_UNBIND); + + flow_indr_block_cb_del(indr_block_cb); + flow_indr_block_dev_put(indr_dev); +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister); + +void flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + rtnl_lock(); + __flow_indr_block_cb_unregister(dev, cb, cb_ident); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister); + +void flow_indr_block_call(struct net_device *dev, + flow_indr_block_ing_cmd_t cb, + struct flow_block_offload *bo, + enum flow_block_command command) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND + ? cb : NULL; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + bo); +} +EXPORT_SYMBOL_GPL(flow_indr_block_call); + +static int __init init_flow_indr_rhashtable(void) +{ + return rhashtable_init(&indr_setup_block_ht, + &flow_indr_setup_block_ht_params); +} +subsys_initcall(init_flow_indr_rhashtable); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 7c34fc6851c3..0b0dde26783d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -37,6 +37,7 @@ #include #include #include +#include extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -545,139 +546,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) } } -static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) -{ - const struct Qdisc_class_ops *cops; - struct Qdisc *qdisc; - - if (!dev_ingress_queue(dev)) - return NULL; - - qdisc = dev_ingress_queue(dev)->qdisc_sleeping; - if (!qdisc) - return NULL; - - cops = qdisc->ops->cl_ops; - if (!cops) - return NULL; - - if (!cops->tcf_block) - return NULL; - - return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); -} - -static struct rhashtable indr_setup_block_ht; - -struct tc_indr_block_dev { - struct rhash_head ht_node; - struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; -}; - -struct tc_indr_block_cb { - struct list_head list; - void *cb_priv; - tc_indr_block_bind_cb_t *cb; - void *cb_ident; -}; - -static const struct rhashtable_params tc_indr_setup_block_ht_params = { - .key_offset = offsetof(struct tc_indr_block_dev, dev), - .head_offset = offsetof(struct tc_indr_block_dev, ht_node), - .key_len = sizeof(struct net_device *), -}; - -static struct tc_indr_block_dev * -tc_indr_block_dev_lookup(struct net_device *dev) -{ - return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, - tc_indr_setup_block_ht_params); -} - -static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) -{ - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (indr_dev) - goto inc_ref; - - indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); - if (!indr_dev) - return NULL; - - INIT_LIST_HEAD(&indr_dev->cb_list); - indr_dev->dev = dev; - if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params)) { - kfree(indr_dev); - return NULL; - } - -inc_ref: - indr_dev->refcnt++; - return indr_dev; -} - -static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) -{ - if (--indr_dev->refcnt) - return; - - rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params); - kfree(indr_dev); -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - if (indr_block_cb->cb == cb && - indr_block_cb->cb_ident == cb_ident) - return indr_block_cb; - return NULL; -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (indr_block_cb) - return ERR_PTR(-EEXIST); - - indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); - if (!indr_block_cb) - return ERR_PTR(-ENOMEM); - - indr_block_cb->cb_priv = cb_priv; - indr_block_cb->cb = cb; - indr_block_cb->cb_ident = cb_ident; - list_add(&indr_block_cb->list, &indr_dev->cb_list); - - return indr_block_cb; -} - -static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) -{ - list_del(&indr_block_cb->list); - kfree(indr_block_cb); -} - static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); static void tc_indr_block_ing_cmd(struct net_device *dev, struct tcf_block *block, - tc_indr_block_bind_cb_t *cb, + flow_indr_block_bind_cb_t *cb, void *cb_priv, enum flow_block_command command) { @@ -699,97 +573,40 @@ static void tc_indr_block_ing_cmd(struct net_device *dev, tcf_block_setup(block, &bo); } -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - struct tcf_block *block; - int err; - - indr_dev = tc_indr_block_dev_get(dev); - if (!indr_dev) - return -ENOMEM; - - indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); - err = PTR_ERR_OR_ZERO(indr_block_cb); - if (err) - goto err_dev_put; - - block = tc_dev_ingress_block(dev); - tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb, - indr_block_cb->cb_priv, FLOW_BLOCK_BIND); - return 0; - -err_dev_put: - tc_indr_block_dev_put(indr_dev); - return err; -} -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); - -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) { - int err; - - rtnl_lock(); - err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); - rtnl_unlock(); - - return err; -} -EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - struct tcf_block *block; + if (!dev_ingress_queue(dev)) + return NULL; - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (!indr_block_cb) - return; + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; - /* Send unbind message if required to free any block cbs. */ - block = tc_dev_ingress_block(dev); - tc_indr_block_ing_cmd(dev, block, indr_block_cb->cb, - indr_block_cb->cb_priv, FLOW_BLOCK_UNBIND); - tc_indr_block_cb_del(indr_block_cb); - tc_indr_block_dev_put(indr_dev); -} -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); + if (!cops->tcf_block) + return NULL; -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - rtnl_lock(); - __tc_indr_block_cb_unregister(dev, cb, cb_ident); - rtnl_unlock(); + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); -static void flow_indr_block_call(struct net_device *dev, - struct flow_block_offload *bo, - enum flow_block_command command) +static void tc_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + struct tcf_block *block = tc_dev_ingress_block(dev); - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - bo); + tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command); } -static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, +static void tc_indr_block_call(struct tcf_block *block, + struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) @@ -804,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - flow_indr_block_call(dev, &bo, command); + flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command); tcf_block_setup(block, &bo); } @@ -3378,11 +3195,6 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - err = rhashtable_init(&indr_setup_block_ht, - &tc_indr_setup_block_ht_params); - if (err) - goto err_rhash_setup_block_ht; - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, @@ -3396,8 +3208,6 @@ static int __init tc_filter_init(void) return 0; -err_rhash_setup_block_ht: - unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; -- cgit v1.2.3 From 1150ab0f1b333ca310431dac65d8fa403b8471da Mon Sep 17 00:00:00 2001 From: wenxu Date: Wed, 7 Aug 2019 09:13:53 +0800 Subject: flow_offload: support get multi-subsystem block It provide a callback list to find the blocks of tc and nft subsystems Signed-off-by: wenxu Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/flow_offload.h | 10 ++++++++- net/core/flow_offload.c | 51 ++++++++++++++++++++++++++++++++++------------ net/sched/cls_api.c | 9 +++++++- 3 files changed, 55 insertions(+), 15 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 46b8777ad05d..e8069b6c474c 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -379,6 +379,15 @@ typedef void flow_indr_block_ing_cmd_t(struct net_device *dev, void *cb_priv, enum flow_block_command command); +struct flow_indr_block_ing_entry { + flow_indr_block_ing_cmd_t *cb; + struct list_head list; +}; + +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry); + +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry); + int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, flow_indr_block_bind_cb_t *cb, void *cb_ident); @@ -395,7 +404,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev, void *cb_ident); void flow_indr_block_call(struct net_device *dev, - flow_indr_block_ing_cmd_t *cb, struct flow_block_offload *bo, enum flow_block_command command); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 4cc18e462d50..64c3d4d72b9c 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -3,6 +3,7 @@ #include #include #include +#include struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -282,6 +283,8 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, } EXPORT_SYMBOL(flow_block_cb_setup_simple); +static LIST_HEAD(block_ing_cb_list); + static struct rhashtable indr_setup_block_ht; struct flow_indr_block_cb { @@ -295,7 +298,6 @@ struct flow_indr_block_dev { struct rhash_head ht_node; struct net_device *dev; unsigned int refcnt; - flow_indr_block_ing_cmd_t *block_ing_cmd_cb; struct list_head cb_list; }; @@ -389,6 +391,20 @@ static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb) kfree(indr_block_cb); } +static void flow_block_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) +{ + struct flow_indr_block_ing_entry *entry; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &block_ing_cb_list, list) { + entry->cb(dev, cb, cb_priv, command); + } + rcu_read_unlock(); +} + int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, flow_indr_block_bind_cb_t *cb, void *cb_ident) @@ -406,10 +422,8 @@ int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, if (err) goto err_dev_put; - if (indr_dev->block_ing_cmd_cb) - indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb, - indr_block_cb->cb_priv, - FLOW_BLOCK_BIND); + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_BIND); return 0; @@ -448,10 +462,8 @@ void __flow_indr_block_cb_unregister(struct net_device *dev, if (!indr_block_cb) return; - if (indr_dev->block_ing_cmd_cb) - indr_dev->block_ing_cmd_cb(dev, indr_block_cb->cb, - indr_block_cb->cb_priv, - FLOW_BLOCK_UNBIND); + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_UNBIND); flow_indr_block_cb_del(indr_block_cb); flow_indr_block_dev_put(indr_dev); @@ -469,7 +481,6 @@ void flow_indr_block_cb_unregister(struct net_device *dev, EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister); void flow_indr_block_call(struct net_device *dev, - flow_indr_block_ing_cmd_t cb, struct flow_block_offload *bo, enum flow_block_command command) { @@ -480,15 +491,29 @@ void flow_indr_block_call(struct net_device *dev, if (!indr_dev) return; - indr_dev->block_ing_cmd_cb = command == FLOW_BLOCK_BIND - ? cb : NULL; - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, bo); } EXPORT_SYMBOL_GPL(flow_indr_block_call); +static DEFINE_MUTEX(flow_indr_block_ing_cb_lock); +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_add_tail_rcu(&entry->list, &block_ing_cb_list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb); + +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_del_rcu(&entry->list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb); + static int __init init_flow_indr_rhashtable(void) { return rhashtable_init(&indr_setup_block_ht, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 0b0dde26783d..e0d8b456e9f5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -621,7 +621,7 @@ static void tc_indr_block_call(struct tcf_block *block, }; INIT_LIST_HEAD(&bo.cb_list); - flow_indr_block_call(dev, tc_indr_block_get_and_ing_cmd, &bo, command); + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } @@ -3183,6 +3183,11 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = tc_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + static int __init tc_filter_init(void) { int err; @@ -3195,6 +3200,8 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; + flow_indr_add_block_ing_cb(&block_ing_entry); + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, -- cgit v1.2.3 From 4f8116c85057239ff37519debdd5d45b38ad8130 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:44:57 +0300 Subject: net: sched: protect block offload-related fields with rw_semaphore In order to remove dependency on rtnl lock, extend tcf_block with 'cb_lock' rwsem and use it to protect flow_block->cb_list and related counters from concurrent modification. The lock is taken in read mode for read-only traversal of cb_list in tc_setup_cb_call() and write mode in all other cases. This approach ensures that: - cb_list is not changed concurrently while filters is being offloaded on block. - block->nooffloaddevcnt is checked while holding the lock in read mode, but is only changed by bind/unbind code when holding the cb_lock in write mode to prevent concurrent modification. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 ++ net/sched/cls_api.c | 45 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 38 insertions(+), 9 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d9f359af0b93..a3eaf5f9d28f 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -396,6 +397,7 @@ struct tcf_block { refcount_t refcnt; struct net *net; struct Qdisc *q; + struct rw_semaphore cb_lock; /* protects cb_list and offload counters */ struct flow_block flow_block; struct list_head owner_list; bool keep_dst; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e0d8b456e9f5..959b7ca1ca02 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -568,9 +568,11 @@ static void tc_indr_block_ing_cmd(struct net_device *dev, bo.block = &block->flow_block; + down_write(&block->cb_lock); cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) @@ -661,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -669,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -695,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -702,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -820,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1355,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1393,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1427,6 +1445,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -2987,19 +3007,26 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, int ok_count = 0; int err; + down_read(&block->cb_lock); /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { - if (err_stop) - return err; + if (err_stop) { + ok_count = err; + goto err_unlock; + } } else { ok_count++; } } +err_unlock: + up_read(&block->cb_lock); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); -- cgit v1.2.3 From 97394bef5622cb32fd1e5d152251090da6c238b9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:44:58 +0300 Subject: net: sched: change tcf block offload counter type to atomic_t As a preparation for running proto ops functions without rtnl lock, change offload counter type to atomic. This is necessary to allow updating the counter by multiple concurrent users when offloading filters to hardware from unlocked classifiers. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 7 ++++--- net/sched/cls_api.c | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a3eaf5f9d28f..d778c502decd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -401,7 +402,7 @@ struct tcf_block { struct flow_block flow_block; struct list_head owner_list; bool keep_dst; - unsigned int offloadcnt; /* Number of oddloaded filters */ + atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ struct { struct tcf_chain *chain; @@ -443,7 +444,7 @@ static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) if (*flags & TCA_CLS_FLAGS_IN_HW) return; *flags |= TCA_CLS_FLAGS_IN_HW; - block->offloadcnt++; + atomic_inc(&block->offloadcnt); } static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) @@ -451,7 +452,7 @@ static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) if (!(*flags & TCA_CLS_FLAGS_IN_HW)) return; *flags &= ~TCA_CLS_FLAGS_IN_HW; - block->offloadcnt--; + atomic_dec(&block->offloadcnt); } static inline void diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 959b7ca1ca02..f2c2f8159e35 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -629,7 +629,7 @@ static void tc_indr_block_call(struct tcf_block *block, static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, -- cgit v1.2.3 From 401192113730947572d280ec465555ab9ff5a597 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:44:59 +0300 Subject: net: sched: refactor block offloads counter usage Without rtnl lock protection filters can no longer safely manage block offloads counter themselves. Refactor cls API to protect block offloadcnt with tcf_block->cb_lock that is already used to protect driver callback list and nooffloaddevcnt counter. The counter can be modified by concurrent tasks by new functions that execute block callbacks (which is safe with previous patch that changed its type to atomic_t), however, block bind/unbind code that checks the counter value takes cb_lock in write mode to exclude any concurrent modifications. This approach prevents race conditions between bind/unbind and callback execution code but allows for concurrency for tc rule update path. Move block offload counter, filter in hardware counter and filter flags management from classifiers into cls hardware offloads API. Make functions tcf_block_offload_{inc|dec}() and tc_cls_offload_cnt_update() to be cls API private. Implement following new cls API to be used instead: tc_setup_cb_add() - non-destructive filter add. If filter that wasn't already in hardware is successfully offloaded, increment block offloads counter, set filter in hardware counter and flag. On failure, previously offloaded filter is considered to be intact and offloads counter is not decremented. tc_setup_cb_replace() - destructive filter replace. Release existing filter block offload counter and reset its in hardware counter and flag. Set new filter in hardware counter and flag. On failure, previously offloaded filter is considered to be destroyed and offload counter is decremented. tc_setup_cb_destroy() - filter destroy. Unconditionally decrement block offloads counter. tc_setup_cb_reoffload() - reoffload filter to single cb. Execute cb() and call tc_cls_offload_cnt_update() if cb() didn't return an error. Refactor all offload-capable classifiers to atomically offload filters to hardware, change block offload counter, and set filter in hardware counter and flag by means of the new cls API functions. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 17 ++++- include/net/sch_generic.h | 31 -------- net/sched/cls_api.c | 176 ++++++++++++++++++++++++++++++++++++++++++---- net/sched/cls_bpf.c | 38 +++++----- net/sched/cls_flower.c | 38 ++++------ net/sched/cls_matchall.c | 27 +++---- net/sched/cls_u32.c | 29 ++++---- 7 files changed, 233 insertions(+), 123 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 64999ffcb486..612232492f67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -506,7 +506,22 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop); + void *type_data, bool err_stop, bool rtnl_held); +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held); +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held); +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count); unsigned int tcf_exts_num_actions(struct tcf_exts *exts); struct tc_cls_u32_knode { diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d778c502decd..f90e3b2a3065 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -439,37 +439,6 @@ static inline bool lockdep_tcf_proto_is_locked(struct tcf_proto *tp) #define tcf_proto_dereference(p, tp) \ rcu_dereference_protected(p, lockdep_tcf_proto_is_locked(tp)) -static inline void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) -{ - if (*flags & TCA_CLS_FLAGS_IN_HW) - return; - *flags |= TCA_CLS_FLAGS_IN_HW; - atomic_inc(&block->offloadcnt); -} - -static inline void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) -{ - if (!(*flags & TCA_CLS_FLAGS_IN_HW)) - return; - *flags &= ~TCA_CLS_FLAGS_IN_HW; - atomic_dec(&block->offloadcnt); -} - -static inline void -tc_cls_offload_cnt_update(struct tcf_block *block, u32 *cnt, - u32 *flags, bool add) -{ - if (add) { - if (!*cnt) - tcf_block_offload_inc(block, flags); - (*cnt)++; - } else { - (*cnt)--; - if (!*cnt) - tcf_block_offload_dec(block, flags); - } -} - static inline void qdisc_cb_private_validate(const struct sk_buff *skb, int sz) { struct qdisc_skb_cb *qcb; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index f2c2f8159e35..6e612984e4a6 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3000,37 +3000,185 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - down_read(&block->cb_lock); - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) { - ok_count = -EOPNOTSUPP; - goto err_unlock; - } - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { - if (err_stop) { - ok_count = err; - goto err_unlock; - } + if (err_stop) + return err; } else { ok_count++; } } -err_unlock: + return ok_count; +} + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); up_read(&block->cb_lock); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + int ok_count; + + down_read(&block->cb_lock); + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + up_read(&block->cb_lock); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts) { diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 3f7a9c02b70c..bf10bdaf5012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = obj->exts_integrated; if (oldprog) - tcf_block_offload_dec(block, &oldprog->gen_flags); + err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, + &prog->gen_flags, &prog->in_hw_count, + true); + else + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &prog->gen_flags, + &prog->in_hw_count, true); - err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (prog) { - if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog, extack); - return err; - } else if (err > 0) { - prog->in_hw_count = err; - tcf_block_offload_inc(block, &prog->gen_flags); - } + if (prog && err) { + cls_bpf_offload_cmd(tp, oldprog, prog, extack); + return err; } if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) @@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true); } static int cls_bpf_init(struct tcf_proto *tp) @@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); - if (err) { - if (add && tc_skip_sw(prog->gen_flags)) - return err; - continue; - } - - tc_cls_offload_cnt_update(block, &prog->in_hw_count, - &prog->gen_flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF, + &cls_bpf, cb_priv, &prog->gen_flags, + &prog->in_hw_count); + if (err) + return err; } return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 054123742e32..cb816bbbd376 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -419,10 +419,10 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, + &f->flags, &f->in_hw_count, true); spin_lock(&tp->lock); list_del_init(&f->hw_list); - tcf_block_offload_dec(block, &f->flags); spin_unlock(&tp->lock); if (!rtnl_held) @@ -466,18 +466,13 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, goto errout; } - err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, + skip_sw, &f->flags, &f->in_hw_count, true); kfree(cls_flower.rule); - if (err < 0) { + if (err) { fl_hw_destroy_filter(tp, f, true, NULL); goto errout; - } else if (err > 0) { - f->in_hw_count = err; - err = 0; - spin_lock(&tp->lock); - tcf_block_offload_inc(block, &f->flags); - spin_unlock(&tp->lock); } if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { @@ -509,7 +504,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, @@ -1844,21 +1839,16 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, + TC_SETUP_CLSFLOWER, &cls_flower, + cb_priv, &f->flags, + &f->in_hw_count); kfree(cls_flower.rule); if (err) { - if (add && tc_skip_sw(f->flags)) { - __fl_put(f); - return err; - } - goto next_flow; + __fl_put(f); + return err; } - - spin_lock(&tp->lock); - tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, - add); - spin_unlock(&tp->lock); next_flow: __fl_put(f); } @@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); kfree(cls_flower.rule); return 0; @@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = FLOW_CLS_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 455ea2793f9b..911d1ea28bb2 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); - tcf_block_offload_dec(block, &head->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false, + &head->flags, &head->in_hw_count, true); } static int mall_replace_hw_filter(struct tcf_proto *tp, @@ -109,15 +109,13 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return err; } - err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, + skip_sw, &head->flags, &head->in_hw_count, true); kfree(cls_mall.rule); - if (err < 0) { + if (err) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; - } else if (err > 0) { - head->in_hw_count = err; - tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -312,16 +310,13 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } - err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, + &cls_mall, cb_priv, &head->flags, + &head->in_hw_count); kfree(cls_mall.rule); - if (err) { - if (add && tc_skip_sw(head->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + if (err) + return err; return 0; } @@ -337,7 +332,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_STATS; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, cls_mall.stats.pkts, cls_mall.stats.lastused); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8614088edd1b..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); - tcf_block_offload_dec(block, &n->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, + &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, @@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); - if (err < 0) { + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, + &n->flags, &n->in_hw_count, true); + if (err) { u32_remove_hw_knode(tp, n, NULL); return err; - } else if (err > 0) { - n->in_hw_count = err; - tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); - if (err) { - if (add && tc_skip_sw(n->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); + if (err) + return err; return 0; } -- cgit v1.2.3 From a449a3e77a85fc8b31fef7238451dc87af8ff1af Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:00 +0300 Subject: net: sched: notify classifier on successful offload add/delete To remove dependency on rtnl lock, extend classifier ops with new ops->hw_add() and ops->hw_del() callbacks. Call them from cls API while holding cb_lock every time filter if successfully added to or deleted from hardware. Implement the new API in flower classifier. Use it to manage hw_filters list under cb_lock protection, instead of relying on rtnl lock to synchronize with concurrent fl_reoffload() call. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 ++++ net/sched/cls_api.c | 19 +++++++++++++++++-- net/sched/cls_flower.c | 33 ++++++++++++++++++++++++++------- 3 files changed, 47 insertions(+), 9 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f90e3b2a3065..c4fbbaff30a2 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -312,6 +312,10 @@ struct tcf_proto_ops { int (*reoffload)(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, void *cb_priv, struct netlink_ext_ack *extack); + void (*hw_add)(struct tcf_proto *tp, + void *type_data); + void (*hw_del)(struct tcf_proto *tp, + void *type_data); void (*bind_class)(void *, u32, unsigned long); void * (*tmplt_create)(struct net *net, struct tcf_chain *chain, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 6e612984e4a6..8b807e75fae2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3099,6 +3099,11 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, } ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); if (ok_count > 0) tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, ok_count, true); @@ -3130,11 +3135,18 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, } tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); if (ok_count > 0) - tc_cls_offload_cnt_update(block, tp, new_in_hw_count, new_flags, - ok_count, true); + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); return ok_count < 0 ? ok_count : 0; @@ -3155,6 +3167,9 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + up_read(&block->cb_lock); return ok_count < 0 ? ok_count : 0; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index cb816bbbd376..5cb694469b51 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -421,9 +421,6 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, &f->flags, &f->in_hw_count, true); - spin_lock(&tp->lock); - list_del_init(&f->hw_list); - spin_unlock(&tp->lock); if (!rtnl_held) rtnl_unlock(); @@ -433,7 +430,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = fl_head_dereference(tp); struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; bool skip_sw = tc_skip_sw(f->flags); @@ -480,9 +476,6 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, goto errout; } - spin_lock(&tp->lock); - list_add(&f->hw_list, &head->hw_filters); - spin_unlock(&tp->lock); errout: if (!rtnl_held) rtnl_unlock(); @@ -1856,6 +1849,30 @@ next_flow: return 0; } +static void fl_hw_add(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +} + +static void fl_hw_del(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + + spin_lock(&tp->lock); + if (!list_empty(&f->hw_list)) + list_del_init(&f->hw_list); + spin_unlock(&tp->lock); +} + static int fl_hw_create_tmplt(struct tcf_chain *chain, struct fl_flow_tmplt *tmplt) { @@ -2516,6 +2533,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .reoffload = fl_reoffload, + .hw_add = fl_hw_add, + .hw_del = fl_hw_del, .dump = fl_dump, .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, -- cgit v1.2.3 From c9f14470d04830de217f9d28fcd0deffd7e8c0b1 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:01 +0300 Subject: net: sched: add API for registering unlocked offload block callbacks Extend struct flow_block_offload with "unlocked_driver_cb" flag to allow registering and unregistering block hardware offload callbacks that do not require caller to hold rtnl lock. Extend tcf_block with additional lockeddevcnt counter that is incremented for each non-unlocked driver callback attached to device. This counter is necessary to conditionally obtain rtnl lock before calling hardware callbacks in following patches. Register mlx5 tc block offload callbacks as "unlocked". Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 ++ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 +++ include/net/flow_offload.h | 1 + include/net/sch_generic.h | 1 + net/sched/cls_api.c | 6 ++++++ 5 files changed, 13 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index fa4bf2d4bcd4..8592b98d0e70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3470,10 +3470,12 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; switch (type) { #ifdef CONFIG_MLX5_ESWITCH case TC_SETUP_BLOCK: + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_block_cb_list, mlx5e_setup_tc_block_cb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 3c0d36b2b91c..e7ac6233037d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -763,6 +763,7 @@ mlx5e_rep_indr_setup_tc_block(struct net_device *netdev, if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) return -EOPNOTSUPP; + f->unlocked_driver_cb = true; f->driver_block_list = &mlx5e_block_cb_list; switch (f->command) { @@ -1245,9 +1246,11 @@ static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { struct mlx5e_priv *priv = netdev_priv(dev); + struct flow_block_offload *f = type_data; switch (type) { case TC_SETUP_BLOCK: + f->unlocked_driver_cb = true; return flow_block_cb_setup_simple(type_data, &mlx5e_rep_block_cb_list, mlx5e_rep_setup_tc_cb, diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index 757fa84de654..fc881875f856 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -284,6 +284,7 @@ struct flow_block_offload { enum flow_block_command command; enum flow_block_binder_type binder_type; bool block_shared; + bool unlocked_driver_cb; struct net *net; struct flow_block *block; struct list_head cb_list; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index c4fbbaff30a2..43f5b7ed02bd 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -408,6 +408,7 @@ struct tcf_block { bool keep_dst; atomic_t offloadcnt; /* Number of oddloaded filters */ unsigned int nooffloaddevcnt; /* Number of devs unable to do offload */ + unsigned int lockeddevcnt; /* Number of devs that require rtnl lock. */ struct { struct tcf_chain *chain; struct list_head filter_chain_list; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8b807e75fae2..1a39779bdbad 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1418,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1433,6 +1435,8 @@ err_unroll: block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1454,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } -- cgit v1.2.3 From 11bd634da25735a3f2f12112d02661d462a76792 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:02 +0300 Subject: net: sched: conditionally obtain rtnl lock in cls hw offloads API In order to remove dependency on rtnl lock from offloads code of classifiers, take rtnl lock conditionally before executing driver callbacks. Only obtain rtnl lock if block is bound to devices that require it. Block bind/unbind code is rtnl-locked and obtains block->cb_lock while holding rtnl lock. Obtain locks in same order in tc_setup_cb_*() functions to prevent deadlock. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- net/sched/cls_api.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 1a39779bdbad..3c103cf9fd0d 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3076,11 +3076,28 @@ __tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count; } EXPORT_SYMBOL(tc_setup_cb_call); @@ -3095,9 +3112,23 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; @@ -3115,6 +3146,8 @@ int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, ok_count, true); err_unlock: up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_add); @@ -3131,9 +3164,23 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, u32 *new_flags, unsigned int *new_in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + /* Make sure all netdevs sharing this block are offload-capable. */ if (block->nooffloaddevcnt && err_stop) { ok_count = -EOPNOTSUPP; @@ -3155,6 +3202,8 @@ int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, new_flags, ok_count, true); err_unlock: up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_replace); @@ -3167,9 +3216,23 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, enum tc_setup_type type, void *type_data, bool err_stop, u32 *flags, unsigned int *in_hw_count, bool rtnl_held) { + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; int ok_count; +retry: + if (take_rtnl) + rtnl_lock(); down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); @@ -3177,6 +3240,8 @@ int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, tp->ops->hw_del(tp, type_data); up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); return ok_count < 0 ? ok_count : 0; } EXPORT_SYMBOL(tc_setup_cb_destroy); -- cgit v1.2.3 From 9838b20a7fb28c69fa66ac8e68d967ffe1d0ecad Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:03 +0300 Subject: net: sched: take rtnl lock in tc_setup_flow_action() In order to allow using new flow_action infrastructure from unlocked classifiers, modify tc_setup_flow_action() to accept new 'rtnl_held' argument. Take rtnl lock before accessing tc_action data. This is necessary to protect from concurrent action replace. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 +- net/sched/cls_api.c | 17 +++++++++++++---- net/sched/cls_flower.c | 6 ++++-- net/sched/cls_matchall.c | 4 ++-- 4 files changed, 20 insertions(+), 9 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 612232492f67..a48824bc1489 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -504,7 +504,7 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) } int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts); + const struct tcf_exts *exts, bool rtnl_held); int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 3c103cf9fd0d..8751bb8a682f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3266,14 +3266,17 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, EXPORT_SYMBOL(tc_setup_cb_reoffload); int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3318,6 +3321,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { @@ -3335,6 +3339,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3393,15 +3398,19 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_PTYPE; entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5cb694469b51..fb305bd45d93 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -452,7 +452,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (skip_sw) @@ -1819,7 +1820,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 911d1ea28bb2..3266f25011cc 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -300,7 +300,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { -- cgit v1.2.3 From 5a6ff4b13d598573fc954f672cd2a267b76a01ec Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:04 +0300 Subject: net: sched: take reference to action dev before calling offloads In order to remove dependency on rtnl lock when calling hardware offload API, take reference to action mirred dev when initializing flow_action structure in tc_setup_flow_action(). Implement function tc_cleanup_flow_action(), use it to release the device after hardware offload API is done using it. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 2 ++ net/sched/cls_api.c | 32 ++++++++++++++++++++++++++++++++ net/sched/cls_flower.c | 2 ++ 3 files changed, 36 insertions(+) (limited to 'net/sched/cls_api.c') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index a48824bc1489..e553fc80eb23 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -505,6 +505,8 @@ tcf_match_indev(struct sk_buff *skb, int ifindex) int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held); +void tc_cleanup_flow_action(struct flow_action *flow_action); + int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, void *type_data, bool err_stop, bool rtnl_held); int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8751bb8a682f..d988737693e4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3265,6 +3265,27 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, } EXPORT_SYMBOL(tc_setup_cb_reoffload); +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) { + switch (entry->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: + case FLOW_ACTION_REDIRECT_INGRESS: + case FLOW_ACTION_MIRRED_INGRESS: + if (entry->dev) + dev_put(entry->dev); + break; + default: + break; + } + } +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { @@ -3294,15 +3315,23 @@ int tc_setup_flow_action(struct flow_action *flow_action, } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; entry->dev = tcf_mirred_dev(act); + if (entry->dev) + dev_hold(entry->dev); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3410,6 +3439,9 @@ err_out: if (!rtnl_held) rtnl_unlock(); + if (err) + tc_cleanup_flow_action(flow_action); + return err; } EXPORT_SYMBOL(tc_setup_flow_action); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fb305bd45d93..2852fe6f50d2 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -465,6 +465,7 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw, &f->flags, &f->in_hw_count, true); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { @@ -1838,6 +1839,7 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_SETUP_CLSFLOWER, &cls_flower, cb_priv, &f->flags, &f->in_hw_count); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { -- cgit v1.2.3 From 1444c175a37443d3f6d3db825df050741452c3c3 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 26 Aug 2019 16:45:05 +0300 Subject: net: sched: copy tunnel info when setting flow_action entry->tunnel In order to remove dependency on rtnl lock, modify tc_setup_flow_action() to copy tunnel info, instead of just saving pointer to tunnel_key action tunnel info. This is necessary to prevent concurrent action overwrite from releasing tunnel info while it is being used by rtnl-unlocked driver. Implement helper tcf_tunnel_info_copy() that is used to copy tunnel info with all its options to dynamically allocated memory block. Modify tc_cleanup_flow_action() to free dynamically allocated tunnel info. Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/tc_act/tc_tunnel_key.h | 17 +++++++++++++++++ net/sched/cls_api.c | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h index 7c3f777c168c..0689d9bcdf84 100644 --- a/include/net/tc_act/tc_tunnel_key.h +++ b/include/net/tc_act/tc_tunnel_key.h @@ -59,4 +59,21 @@ static inline struct ip_tunnel_info *tcf_tunnel_info(const struct tc_action *a) return NULL; #endif } + +static inline struct ip_tunnel_info * +tcf_tunnel_info_copy(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + struct ip_tunnel_info *tun = tcf_tunnel_info(a); + + if (tun) { + size_t tun_size = sizeof(*tun) + tun->options_len; + struct ip_tunnel_info *tun_copy = kmemdup(tun, tun_size, + GFP_KERNEL); + + return tun_copy; + } +#endif + return NULL; +} #endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index d988737693e4..671ca905dbb5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3279,6 +3279,9 @@ void tc_cleanup_flow_action(struct flow_action *flow_action) if (entry->dev) dev_put(entry->dev); break; + case FLOW_ACTION_TUNNEL_ENCAP: + kfree(entry->tunnel); + break; default: break; } @@ -3355,7 +3358,11 @@ int tc_setup_flow_action(struct flow_action *flow_action, } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) { + err = -ENOMEM; + goto err_out; + } } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { -- cgit v1.2.3 From 95a7233c452a58a4c2310c456c73997853b2ec46 Mon Sep 17 00:00:00 2001 From: Paul Blakey Date: Wed, 4 Sep 2019 16:56:37 +0300 Subject: net: openvswitch: Set OvS recirc_id from tc chain index Offloaded OvS datapath rules are translated one to one to tc rules, for example the following simplified OvS rule: recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2) Will be translated to the following tc rule: $ tc filter add dev dev1 ingress \ prio 1 chain 0 proto ip \ flower tcp ct_state -trk \ action ct pipe \ action goto chain 2 Received packets will first travel though tc, and if they aren't stolen by it, like in the above rule, they will continue to OvS datapath. Since we already did some actions (action ct in this case) which might modify the packets, and updated action stats, we would like to continue the proccessing with the correct recirc_id in OvS (here recirc_id(2)) where we left off. To support this, introduce a new skb extension for tc, which will be used for translating tc chain to ovs recirc_id to handle these miss cases. Last tc chain index will be set by tc goto chain action and read by OvS datapath. Signed-off-by: Paul Blakey Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 13 +++++++++++++ include/uapi/linux/openvswitch.h | 3 +++ net/core/skbuff.c | 6 ++++++ net/openvswitch/datapath.c | 38 +++++++++++++++++++++++++++++++++----- net/openvswitch/datapath.h | 2 ++ net/openvswitch/flow.c | 13 +++++++++++++ net/sched/Kconfig | 13 +++++++++++++ net/sched/cls_api.c | 12 ++++++++++++ 8 files changed, 95 insertions(+), 5 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77c6dc88e95d..028e684fa974 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -279,6 +279,16 @@ struct nf_bridge_info { }; #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +/* Chain in tc_skb_ext will be used to share the tc chain with + * ovs recirc_id. It will be set to the current chain by tc + * and read by ovs to recirc_id. + */ +struct tc_skb_ext { + __u32 chain; +}; +#endif + struct sk_buff_head { /* These two members must be first. */ struct sk_buff *next; @@ -4057,6 +4067,9 @@ enum skb_ext_id { #endif #ifdef CONFIG_XFRM SKB_EXT_SEC_PATH, +#endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + TC_SKB_EXT, #endif SKB_EXT_NUM, /* must be last */ }; diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index f271f1ec50ae..1887a451c388 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -123,6 +123,9 @@ struct ovs_vport_stats { /* Allow datapath to associate multiple Netlink PIDs to each vport */ #define OVS_DP_F_VPORT_PIDS (1 << 1) +/* Allow tc offload recirc sharing */ +#define OVS_DP_F_TC_RECIRC_SHARING (1 << 2) + /* Fixed logical ports. */ #define OVSP_LOCAL ((__u32)0) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ea8e8d332d85..2b40b5a9425b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4087,6 +4087,9 @@ static const u8 skb_ext_type_len[] = { #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4097,6 +4100,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #endif #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + +#endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext_type_len[TC_SKB_EXT] + #endif 0; } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 65122bbccd27..dde9d762edee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1545,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1610,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); + err = ovs_dp_change(dp, a); + if (err) + goto err_destroy_meters; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -1736,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_SET); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 751d34accdf9..81e85dde8217 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex) extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; +DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d81d2c7bf82..38147e6a20f5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif int res, err; /* Extract metadata from packet. */ @@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (res < 0) return res; key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; + } else { + key->recirc_id = 0; + } +#else key->recirc_id = 0; +#endif err = key_extract(skb, key); if (!err) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index afd2ba157a13..b3faafeafab9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE +config NET_TC_SKB_EXT + bool "TC recirculation support" + depends on NET_CLS_ACT + default y if NET_CLS_ACT + select SKB_EXTENSIONS + + help + Say Y here to allow tc chain misses to continue in OvS datapath in + the correct recirc_id, and hardware chain misses to continue in + the correct chain in tc software datapath. + + Say N here if you won't be using tc<->ovs offload or tc chains offload. + endif # NET_SCHED config NET_SCH_FIFO diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 671ca905dbb5..05c4fe1c3ca2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1514,6 +1514,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif -- cgit v1.2.3 From 1158958a218bb55d1c358200d7f82808d11bf929 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 13 Sep 2019 18:28:39 +0300 Subject: net: sched: extend flow_action_entry with destructor Generalize flow_action_entry cleanup by extending the structure with pointer to destructor function. Set the destructor in tc_setup_flow_action(). Refactor tc_cleanup_flow_action() to call entry->destructor() instead of using switch that dispatches by entry->id and manually executes cleanup. This refactoring is necessary for following patches in this series that require destructor to use tc_action->ops callbacks that can't be easily obtained in tc_cleanup_flow_action(). Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/flow_offload.h | 6 +++- net/sched/cls_api.c | 77 +++++++++++++++++++++++++++------------------- 2 files changed, 50 insertions(+), 33 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index fc881875f856..86c567f531f3 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -154,8 +154,12 @@ enum flow_action_mangle_base { FLOW_ACT_MANGLE_HDR_TYPE_UDP, }; +typedef void (*action_destr)(void *priv); + struct flow_action_entry { enum flow_action_id id; + action_destr destructor; + void *destructor_priv; union { u32 chain_index; /* FLOW_ACTION_GOTO */ struct net_device *dev; /* FLOW_ACTION_REDIRECT */ @@ -170,7 +174,7 @@ struct flow_action_entry { u32 mask; u32 val; } mangle; - const struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ + struct ip_tunnel_info *tunnel; /* FLOW_ACTION_TUNNEL_ENCAP */ u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ u16 ptype; /* FLOW_ACTION_PTYPE */ diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 05c4fe1c3ca2..c668195379bd 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3282,25 +3282,48 @@ void tc_cleanup_flow_action(struct flow_action *flow_action) struct flow_action_entry *entry; int i; - flow_action_for_each(i, entry, flow_action) { - switch (entry->id) { - case FLOW_ACTION_REDIRECT: - case FLOW_ACTION_MIRRED: - case FLOW_ACTION_REDIRECT_INGRESS: - case FLOW_ACTION_MIRRED_INGRESS: - if (entry->dev) - dev_put(entry->dev); - break; - case FLOW_ACTION_TUNNEL_ENCAP: - kfree(entry->tunnel); - break; - default: - break; - } - } + flow_action_for_each(i, entry, flow_action) + if (entry->destructor) + entry->destructor(entry->destructor_priv); } EXPORT_SYMBOL(tc_cleanup_flow_action); +static void tcf_mirred_put_dev(void *priv) +{ + struct net_device *dev = priv; + + dev_put(dev); +} + +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->dev = tcf_mirred_dev(act); + if (!entry->dev) + return; + dev_hold(entry->dev); + entry->destructor = tcf_mirred_put_dev; + entry->destructor_priv = entry->dev; +} + +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { @@ -3329,24 +3352,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; - entry->dev = tcf_mirred_dev(act); - if (entry->dev) - dev_hold(entry->dev); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; - entry->dev = tcf_mirred_dev(act); - if (entry->dev) - dev_hold(entry->dev); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT_INGRESS; - entry->dev = tcf_mirred_dev(act); - if (entry->dev) - dev_hold(entry->dev); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_ingress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED_INGRESS; - entry->dev = tcf_mirred_dev(act); - if (entry->dev) - dev_hold(entry->dev); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3370,11 +3385,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info_copy(act); - if (!entry->tunnel) { - err = -ENOMEM; + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) goto err_out; - } } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { -- cgit v1.2.3 From 4a5da47d5cb6aba3c26a5cc0dddfb2d577e851e9 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 13 Sep 2019 18:28:40 +0300 Subject: net: sched: take reference to psample group in flow_action infra With recent patch set that removed rtnl lock dependency from cls hardware offload API rtnl lock is only taken when reading action data and can be released after action-specific data is parsed into intermediate representation. However, sample action psample group is passed by pointer without obtaining reference to it first, which makes it possible to concurrently overwrite the action and deallocate object pointed by psample_group pointer after rtnl lock is released but before driver finished using the pointer. To prevent such race condition, obtain reference to psample group while it is used by flow_action infra. Extend psample API with function psample_group_take() that increments psample group reference counter. Extend struct tc_action_ops with new get_psample_group() API. Implement the API for action sample using psample_group_take() and already existing psample_group_put() as a destructor. Use it in tc_setup_flow_action() to take reference to psample group pointed to by entry->sample.psample_group and release it in tc_cleanup_flow_action(). Disable bh when taking psample_groups_lock. The lock is now taken while holding action tcf_lock that is used by data path and requires bh to be disabled, so doing the same for psample_groups_lock is necessary to preserve SOFTIRQ-irq-safety. Fixes: 918190f50eb6 ("net: sched: flower: don't take rtnl lock for cls hw offloads API") Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 5 +++++ include/net/psample.h | 1 + include/net/tc_act/tc_sample.h | 6 ------ net/psample/psample.c | 20 ++++++++++++++------ net/sched/act_sample.c | 27 +++++++++++++++++++++++++++ net/sched/cls_api.c | 13 +++++++++++-- 6 files changed, 58 insertions(+), 14 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/act_api.h b/include/net/act_api.h index 3a1a72990fce..4be8b0daedf0 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -78,6 +78,8 @@ static inline void tcf_tm_dump(struct tcf_t *dtm, const struct tcf_t *stm) #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 +typedef void (*tc_action_priv_destructor)(void *priv); + struct tc_action_ops { struct list_head head; char kind[IFNAMSIZ]; @@ -101,6 +103,9 @@ struct tc_action_ops { size_t (*get_fill_size)(const struct tc_action *act); struct net_device *(*get_dev)(const struct tc_action *a); void (*put_dev)(struct net_device *dev); + struct psample_group * + (*get_psample_group)(const struct tc_action *a, + tc_action_priv_destructor *destructor); }; struct tc_action_net { diff --git a/include/net/psample.h b/include/net/psample.h index 6b578ce69cd8..68ae16bb0a4a 100644 --- a/include/net/psample.h +++ b/include/net/psample.h @@ -15,6 +15,7 @@ struct psample_group { }; struct psample_group *psample_group_get(struct net *net, u32 group_num); +void psample_group_take(struct psample_group *group); void psample_group_put(struct psample_group *group); #if IS_ENABLED(CONFIG_PSAMPLE) diff --git a/include/net/tc_act/tc_sample.h b/include/net/tc_act/tc_sample.h index b4fce0fae645..b5d76305e854 100644 --- a/include/net/tc_act/tc_sample.h +++ b/include/net/tc_act/tc_sample.h @@ -41,10 +41,4 @@ static inline int tcf_sample_trunc_size(const struct tc_action *a) return to_sample(a)->trunc_size; } -static inline struct psample_group * -tcf_sample_psample_group(const struct tc_action *a) -{ - return rcu_dereference_rtnl(to_sample(a)->psample_group); -} - #endif /* __NET_TC_SAMPLE_H */ diff --git a/net/psample/psample.c b/net/psample/psample.c index 66e4b61a350d..a6ceb0533b5b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -73,7 +73,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, int idx = 0; int err; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); list_for_each_entry(group, &psample_groups_list, list) { if (!net_eq(group->net, sock_net(msg->sk))) continue; @@ -89,7 +89,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, idx++; } - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); cb->args[0] = idx; return msg->len; } @@ -172,7 +172,7 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) { struct psample_group *group; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); group = psample_group_lookup(net, group_num); if (!group) { @@ -183,19 +183,27 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) group->refcount++; out: - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); return group; } EXPORT_SYMBOL_GPL(psample_group_get); +void psample_group_take(struct psample_group *group) +{ + spin_lock_bh(&psample_groups_lock); + group->refcount++; + spin_unlock_bh(&psample_groups_lock); +} +EXPORT_SYMBOL_GPL(psample_group_take); + void psample_group_put(struct psample_group *group) { - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); if (--group->refcount == 0) psample_group_destroy(group); - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); } EXPORT_SYMBOL_GPL(psample_group_put); diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 10229124a992..692c4c9040fd 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_psample_group_put(void *priv) +{ + struct psample_group *group = priv; + + psample_group_put(group); +} + +static struct psample_group * +tcf_sample_get_group(const struct tc_action *a, + tc_action_priv_destructor *destructor) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *group; + + spin_lock_bh(&s->tcf_lock); + group = rcu_dereference_protected(s->psample_group, + lockdep_is_held(&s->tcf_lock)); + if (group) { + psample_group_take(group); + *destructor = tcf_psample_group_put; + } + spin_unlock_bh(&s->tcf_lock); + + return group; +} + static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, + .get_psample_group = tcf_sample_get_group, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index c668195379bd..60d44b14750a 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3324,6 +3324,16 @@ static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, return 0; } +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts, bool rtnl_held) { @@ -3417,11 +3427,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_sample(act)) { entry->id = FLOW_ACTION_SAMPLE; - entry->sample.psample_group = - tcf_sample_psample_group(act); entry->sample.trunc_size = tcf_sample_trunc_size(act); entry->sample.truncate = tcf_sample_truncate(act); entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_tcfp_burst(act); -- cgit v1.2.3 From 470d5060e6b3b8fae47d944601855e9ece7a2470 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 13 Sep 2019 18:28:41 +0300 Subject: net: sched: use get_dev() action API in flow_action infra When filling in hardware intermediate representation tc_setup_flow_action() directly obtains, checks and takes reference to dev used by mirred action, instead of using act->ops->get_dev() API created specifically for this purpose. In order to remove code duplication, refactor flow_action infra to use action API when obtaining mirred action target dev. Extend get_dev() with additional argument that is used to provide dev destructor to the user. Fixes: 5a6ff4b13d59 ("net: sched: take reference to action dev before calling offloads") Signed-off-by: Vlad Buslov Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++-- net/sched/act_mirred.c | 21 +++++++++++++-------- net/sched/cls_api.c | 13 +++---------- 3 files changed, 18 insertions(+), 20 deletions(-) (limited to 'net/sched/cls_api.c') diff --git a/include/net/act_api.h b/include/net/act_api.h index 4be8b0daedf0..b18c699681ca 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -101,8 +101,8 @@ struct tc_action_ops { struct netlink_ext_ack *); void (*stats_update)(struct tc_action *, u64, u32, u64, bool); size_t (*get_fill_size)(const struct tc_action *act); - struct net_device *(*get_dev)(const struct tc_action *a); - void (*put_dev)(struct net_device *dev); + struct net_device *(*get_dev)(const struct tc_action *a, + tc_action_priv_destructor *destructor); struct psample_group * (*get_psample_group)(const struct tc_action *a, tc_action_priv_destructor *destructor); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9d1bf508075a..9ce073a05414 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) +static void tcf_mirred_dev_put(void *priv) +{ + struct net_device *dev = priv; + + dev_put(dev); +} + +static struct net_device * +tcf_mirred_get_dev(const struct tc_action *a, + tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); - if (dev) + if (dev) { dev_hold(dev); + *destructor = tcf_mirred_dev_put; + } rcu_read_unlock(); return dev; } -static void tcf_mirred_put_dev(struct net_device *dev) -{ - dev_put(dev); -} - static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); @@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = { .get_fill_size = tcf_mirred_get_fill_size, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, - .put_dev = tcf_mirred_put_dev, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 60d44b14750a..32577c248968 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3288,22 +3288,15 @@ void tc_cleanup_flow_action(struct flow_action *flow_action) } EXPORT_SYMBOL(tc_cleanup_flow_action); -static void tcf_mirred_put_dev(void *priv) -{ - struct net_device *dev = priv; - - dev_put(dev); -} - static void tcf_mirred_get_dev(struct flow_action_entry *entry, const struct tc_action *act) { - entry->dev = tcf_mirred_dev(act); +#ifdef CONFIG_NET_CLS_ACT + entry->dev = act->ops->get_dev(act, &entry->destructor); if (!entry->dev) return; - dev_hold(entry->dev); - entry->destructor = tcf_mirred_put_dev; entry->destructor_priv = entry->dev; +#endif } static void tcf_tunnel_encap_put_tunnel(void *priv) -- cgit v1.2.3