diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-14 12:21:12 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-14 12:21:12 -0500 |
commit | 5d498214afced560f13c1b0610583fc883439753 (patch) | |
tree | b5ce274ec5fddee527cc5195f4dba2c6098eb18a | |
parent | 1988c7957881a83111599273d3f335dd57a70a3e (diff) | |
parent | 93d8a4c1b5f55855ae3ef7063ce79af13212f96f (diff) | |
download | linux-5d498214afced560f13c1b0610583fc883439753.tar.bz2 |
Merge branch 'mlxsw-Offload-PRIO-qdisc'
Jiri Pirko says:
====================
mlxsw: Offload PRIO qdisc
Nogah says:
Add an offload support for PRIO qdisc for mlxsw driver.
PRIO qdisc is being offloaded by using ndo_setup_tc. It has three
commands, to set or tune the qdisc, to remove it and to get its stats.
Like RED offloading, offloading this qdisc is not enforced on the driver
and determining its offload state is done in the dump action, when the
stats are being updated.
In the driver, offloading of PRIO is supported as root qdisc only. It
supports only priorities 0-7 (the range that is used by the current static
mapping of DSCP to skb prio and by 1:1 PCP values mapping) and up to 8
bands.
Patches 1-2 offload DSCP to priority mapping in the mlxsw_sp driver.
Patch 3 adds offload support for PRIO qdisc.
Patches 4-5 Add PRIO offload support in the mlxsw_sp driver.
---
v1->v2:
- Patch 1/5:
- Rewrite patch msg
- Patch 3/5:
- Send all the qstats in the replace command (and not just backlog)
- Patch 5/5:
- Align with the changes from 3/5
- Move backlog to the generic qdisc stats struct
- Delete extra newline
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/item.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/reg.h | 37 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum.h | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c | 171 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 24 | ||||
-rw-r--r-- | include/linux/netdevice.h | 1 | ||||
-rw-r--r-- | include/net/pkt_cls.h | 25 | ||||
-rw-r--r-- | net/sched/sch_prio.c | 59 |
9 files changed, 322 insertions, 1 deletions
diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h index 28427f0758c7..31c886edc791 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/item.h +++ b/drivers/net/ethernet/mellanox/mlxsw/item.h @@ -42,7 +42,7 @@ struct mlxsw_item { unsigned short offset; /* bytes in container */ - unsigned short step; /* step in bytes for indexed items */ + short step; /* step in bytes for indexed items */ unsigned short in_step_offset; /* offset within one step */ unsigned char shift; /* shift in bits */ unsigned char element_size; /* size of element in bit array */ diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 6c4e08b8058a..0e08be41c8e0 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -4827,6 +4827,42 @@ static inline void mlxsw_reg_ratr_counter_pack(char *payload, u64 counter_index, mlxsw_reg_ratr_counter_set_type_set(payload, set_type); } +/* RDPM - Router DSCP to Priority Mapping + * -------------------------------------- + * Controls the mapping from DSCP field to switch priority on routed packets + */ +#define MLXSW_REG_RDPM_ID 0x8009 +#define MLXSW_REG_RDPM_BASE_LEN 0x00 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN 0x01 +#define MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT 64 +#define MLXSW_REG_RDPM_LEN 0x40 +#define MLXSW_REG_RDPM_LAST_ENTRY (MLXSW_REG_RDPM_BASE_LEN + \ + MLXSW_REG_RDPM_LEN - \ + MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN) + +MLXSW_REG_DEFINE(rdpm, MLXSW_REG_RDPM_ID, MLXSW_REG_RDPM_LEN); + +/* reg_dscp_entry_e + * Enable update of the specific entry + * Access: Index + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_e, MLXSW_REG_RDPM_LAST_ENTRY, 7, 1, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +/* reg_dscp_entry_prio + * Switch Priority + * Access: RW + */ +MLXSW_ITEM8_INDEXED(reg, rdpm, dscp_entry_prio, MLXSW_REG_RDPM_LAST_ENTRY, 0, 4, + -MLXSW_REG_RDPM_DSCP_ENTRY_REC_LEN, 0x00, false); + +static inline void mlxsw_reg_rdpm_pack(char *payload, unsigned short index, + u8 prio) +{ + mlxsw_reg_rdpm_dscp_entry_e_set(payload, index, 1); + mlxsw_reg_rdpm_dscp_entry_prio_set(payload, index, prio); +} + /* RICNT - Router Interface Counter Register * ----------------------------------------- * The RICNT register retrieves per port performance counters @@ -7640,6 +7676,7 @@ static const struct mlxsw_reg_info *mlxsw_reg_infos[] = { MLXSW_REG(rtar), MLXSW_REG(ratr), MLXSW_REG(rtdp), + MLXSW_REG(rdpm), MLXSW_REG(ricnt), MLXSW_REG(rrcr), MLXSW_REG(ralta), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 54c7d9202e81..f78bfe394966 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -1830,6 +1830,8 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type, return mlxsw_sp_setup_tc_block(mlxsw_sp_port, type_data); case TC_SETUP_QDISC_RED: return mlxsw_sp_setup_tc_red(mlxsw_sp_port, type_data); + case TC_SETUP_QDISC_PRIO: + return mlxsw_sp_setup_tc_prio(mlxsw_sp_port, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b6f475e83474..16f8fbda0891 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -565,6 +565,8 @@ int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port); void mlxsw_sp_tc_qdisc_fini(struct mlxsw_sp_port *mlxsw_sp_port); int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, struct tc_red_qopt_offload *p); +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p); /* spectrum_fid.c */ int mlxsw_sp_fid_flood_set(struct mlxsw_sp_fid *fid, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c index 971f689dd833..e11a0abfc663 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_qdisc.c @@ -41,9 +41,12 @@ #include "spectrum.h" #include "reg.h" +#define MLXSW_SP_PRIO_BAND_TO_TCLASS(band) (IEEE_8021QAZ_MAX_TCS - band - 1) + enum mlxsw_sp_qdisc_type { MLXSW_SP_QDISC_NO_QDISC, MLXSW_SP_QDISC_RED, + MLXSW_SP_QDISC_PRIO, }; struct mlxsw_sp_qdisc_ops { @@ -63,6 +66,11 @@ struct mlxsw_sp_qdisc_ops { void *xstats_ptr); void (*clean_stats)(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp_qdisc *mlxsw_sp_qdisc); + /* unoffload - to be used for a qdisc that stops being offloaded without + * being destroyed. + */ + void (*unoffload)(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, void *params); }; struct mlxsw_sp_qdisc { @@ -76,6 +84,7 @@ struct mlxsw_sp_qdisc { u64 tx_packets; u64 drops; u64 overlimits; + u64 backlog; } stats_base; struct mlxsw_sp_qdisc_ops *ops; @@ -141,6 +150,9 @@ mlxsw_sp_qdisc_replace(struct mlxsw_sp_port *mlxsw_sp_port, u32 handle, err_bad_param: err_config: + if (mlxsw_sp_qdisc->handle == handle && ops->unoffload) + ops->unoffload(mlxsw_sp_port, mlxsw_sp_qdisc, params); + mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); return err; } @@ -403,6 +415,165 @@ int mlxsw_sp_setup_tc_red(struct mlxsw_sp_port *mlxsw_sp_port, } } +static int +mlxsw_sp_qdisc_prio_destroy(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + int i; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, + MLXSW_SP_PORT_DEFAULT_TCLASS); + + return 0; +} + +static int +mlxsw_sp_qdisc_prio_check_params(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + + if (p->bands > IEEE_8021QAZ_MAX_TCS) + return -EOPNOTSUPP; + + return 0; +} + +static int +mlxsw_sp_qdisc_prio_replace(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + int tclass, i; + int err; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + tclass = MLXSW_SP_PRIO_BAND_TO_TCLASS(p->priomap[i]); + err = mlxsw_sp_port_prio_tc_set(mlxsw_sp_port, i, tclass); + if (err) + return err; + } + + return 0; +} + +void +mlxsw_sp_qdisc_prio_unoffload(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + void *params) +{ + struct tc_prio_qopt_offload_params *p = params; + u64 backlog; + + backlog = mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + mlxsw_sp_qdisc->stats_base.backlog); + p->qstats->backlog -= backlog; +} + +static int +mlxsw_sp_qdisc_get_prio_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc, + struct tc_qopt_offload_stats *stats_ptr) +{ + u64 tx_bytes, tx_packets, drops = 0, backlog = 0; + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + int i; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + tx_bytes = stats->tx_bytes - stats_base->tx_bytes; + tx_packets = stats->tx_packets - stats_base->tx_packets; + + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) { + drops += xstats->tail_drop[i]; + backlog += xstats->backlog[i]; + } + drops = drops - stats_base->drops; + + _bstats_update(stats_ptr->bstats, tx_bytes, tx_packets); + stats_ptr->qstats->drops += drops; + stats_ptr->qstats->backlog += + mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + backlog) - + mlxsw_sp_cells_bytes(mlxsw_sp_port->mlxsw_sp, + stats_base->backlog); + stats_base->backlog = backlog; + stats_base->drops += drops; + stats_base->tx_bytes += tx_bytes; + stats_base->tx_packets += tx_packets; + return 0; +} + +static void +mlxsw_sp_setup_tc_qdisc_prio_clean_stats(struct mlxsw_sp_port *mlxsw_sp_port, + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc) +{ + struct mlxsw_sp_qdisc_stats *stats_base; + struct mlxsw_sp_port_xstats *xstats; + struct rtnl_link_stats64 *stats; + int i; + + xstats = &mlxsw_sp_port->periodic_hw_stats.xstats; + stats = &mlxsw_sp_port->periodic_hw_stats.stats; + stats_base = &mlxsw_sp_qdisc->stats_base; + + stats_base->tx_packets = stats->tx_packets; + stats_base->tx_bytes = stats->tx_bytes; + + stats_base->drops = 0; + for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) + stats_base->drops += xstats->tail_drop[i]; + + mlxsw_sp_qdisc->stats_base.backlog = 0; +} + +static struct mlxsw_sp_qdisc_ops mlxsw_sp_qdisc_ops_prio = { + .type = MLXSW_SP_QDISC_PRIO, + .check_params = mlxsw_sp_qdisc_prio_check_params, + .replace = mlxsw_sp_qdisc_prio_replace, + .unoffload = mlxsw_sp_qdisc_prio_unoffload, + .destroy = mlxsw_sp_qdisc_prio_destroy, + .get_stats = mlxsw_sp_qdisc_get_prio_stats, + .clean_stats = mlxsw_sp_setup_tc_qdisc_prio_clean_stats, +}; + +int mlxsw_sp_setup_tc_prio(struct mlxsw_sp_port *mlxsw_sp_port, + struct tc_prio_qopt_offload *p) +{ + struct mlxsw_sp_qdisc *mlxsw_sp_qdisc; + + if (p->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + mlxsw_sp_qdisc = mlxsw_sp_port->root_qdisc; + if (p->command == TC_PRIO_REPLACE) + return mlxsw_sp_qdisc_replace(mlxsw_sp_port, p->handle, + mlxsw_sp_qdisc, + &mlxsw_sp_qdisc_ops_prio, + &p->replace_params); + + if (!mlxsw_sp_qdisc_compare(mlxsw_sp_qdisc, p->handle, + MLXSW_SP_QDISC_PRIO)) + return -EOPNOTSUPP; + + switch (p->command) { + case TC_PRIO_DESTROY: + return mlxsw_sp_qdisc_destroy(mlxsw_sp_port, mlxsw_sp_qdisc); + case TC_PRIO_STATS: + return mlxsw_sp_qdisc_get_stats(mlxsw_sp_port, mlxsw_sp_qdisc, + &p->stats); + default: + return -EOPNOTSUPP; + } +} + int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port) { mlxsw_sp_port->root_qdisc = kzalloc(sizeof(*mlxsw_sp_port->root_qdisc), diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7a136256b8f7..01ff5ba6796e 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7011,6 +7011,24 @@ static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp) } #endif +static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp) +{ + char rdpm_pl[MLXSW_REG_RDPM_LEN]; + unsigned int i; + + MLXSW_REG_ZERO(rdpm, rdpm_pl); + + /* HW is determining switch priority based on DSCP-bits, but the + * kernel is still doing that based on the ToS. Since there's a + * mismatch in bits we need to make sure to translate the right + * value ToS would observe, skipping the 2 least-significant ECN bits. + */ + for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++) + mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2)); + + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl); +} + static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) { char rgcr_pl[MLXSW_REG_RGCR_LEN]; @@ -7023,6 +7041,7 @@ static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) mlxsw_reg_rgcr_pack(rgcr_pl, true, true); mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs); + mlxsw_reg_rgcr_usp_set(rgcr_pl, true); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); if (err) return err; @@ -7098,6 +7117,10 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_mp_hash_init; + err = mlxsw_sp_dscp_init(mlxsw_sp); + if (err) + goto err_dscp_init; + mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event; err = register_fib_notifier(&mlxsw_sp->router->fib_nb, mlxsw_sp_router_fib_dump_flush); @@ -7107,6 +7130,7 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) return 0; err_register_fib_notifier: +err_dscp_init: err_mp_hash_init: unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb); err_register_netevent_notifier: diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef7b348e8498..6d95477b962c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -780,6 +780,7 @@ enum tc_setup_type { TC_SETUP_BLOCK, TC_SETUP_QDISC_CBS, TC_SETUP_QDISC_RED, + TC_SETUP_QDISC_PRIO, }; /* These structures hold the attributes of bpf state that are being passed diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0d1343cba84c..9c341f003091 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -761,4 +761,29 @@ struct tc_red_qopt_offload { }; }; +enum tc_prio_command { + TC_PRIO_REPLACE, + TC_PRIO_DESTROY, + TC_PRIO_STATS, +}; + +struct tc_prio_qopt_offload_params { + int bands; + u8 priomap[TC_PRIO_MAX + 1]; + /* In case that a prio qdisc is offloaded and now is changed to a + * non-offloadedable config, it needs to update the backlog & qlen + * values to negate the HW backlog & qlen values (and only them). + */ + struct gnet_stats_queue *qstats; +}; + +struct tc_prio_qopt_offload { + enum tc_prio_command command; + u32 handle; + u32 parent; + union { + struct tc_prio_qopt_offload_params replace_params; + struct tc_qopt_offload_stats stats; + }; +}; #endif diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fe1510eb111f..a398502899a9 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -142,6 +142,31 @@ prio_reset(struct Qdisc *sch) sch->q.qlen = 0; } +static int prio_offload(struct Qdisc *sch, bool enable) +{ + struct prio_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_PRIO_REPLACE; + opt.replace_params.bands = q->bands; + memcpy(&opt.replace_params.priomap, q->prio2band, + TC_PRIO_MAX + 1); + opt.replace_params.qstats = &sch->qstats; + } else { + opt.command = TC_PRIO_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt); +} + static void prio_destroy(struct Qdisc *sch) { @@ -149,6 +174,7 @@ prio_destroy(struct Qdisc *sch) struct prio_sched_data *q = qdisc_priv(sch); tcf_block_put(q->block); + prio_offload(sch, false); for (prio = 0; prio < q->bands; prio++) qdisc_destroy(q->queues[prio]); } @@ -204,6 +230,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt, } sch_tree_unlock(sch); + prio_offload(sch, true); return 0; } @@ -223,15 +250,47 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt, return prio_tune(sch, opt, extack); } +static int prio_dump_offload(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_prio_qopt_offload hw_stats = { + .handle = sch->handle, + .parent = sch->parent, + .command = TC_PRIO_STATS, + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }; + int err; + + sch->flags &= ~TCQ_F_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + sch->flags |= TCQ_F_OFFLOADED; + + return err; +} + static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_prio_qopt opt; + int err; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1); + err = prio_dump_offload(sch); + if (err) + goto nla_put_failure; + if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) goto nla_put_failure; |