diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-18 12:34:53 -0700 |
commit | 81160dda9a7aad13c04e78bb2cfd3c4630e3afab (patch) | |
tree | 4bf79ffa9fc7dc5e2915ff978778c3402c491113 /net/sched/sch_taprio.c | |
parent | 8b53c76533aa4356602aea98f98a2f3b4051464c (diff) | |
parent | 1bab8d4c488be22d57f9dd09968c90a0ddc413bf (diff) | |
download | linux-81160dda9a7aad13c04e78bb2cfd3c4630e3afab.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from David Miller:
1) Support IPV6 RA Captive Portal Identifier, from Maciej Żenczykowski.
2) Use bio_vec in the networking instead of custom skb_frag_t, from
Matthew Wilcox.
3) Make use of xmit_more in r8169 driver, from Heiner Kallweit.
4) Add devmap_hash to xdp, from Toke Høiland-Jørgensen.
5) Support all variants of 5750X bnxt_en chips, from Michael Chan.
6) More RTNL avoidance work in the core and mlx5 driver, from Vlad
Buslov.
7) Add TCP syn cookies bpf helper, from Petar Penkov.
8) Add 'nettest' to selftests and use it, from David Ahern.
9) Add extack support to drop_monitor, add packet alert mode and
support for HW drops, from Ido Schimmel.
10) Add VLAN offload to stmmac, from Jose Abreu.
11) Lots of devm_platform_ioremap_resource() conversions, from
YueHaibing.
12) Add IONIC driver, from Shannon Nelson.
13) Several kTLS cleanups, from Jakub Kicinski.
* git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1930 commits)
mlxsw: spectrum_buffers: Add the ability to query the CPU port's shared buffer
mlxsw: spectrum: Register CPU port with devlink
mlxsw: spectrum_buffers: Prevent changing CPU port's configuration
net: ena: fix incorrect update of intr_delay_resolution
net: ena: fix retrieval of nonadaptive interrupt moderation intervals
net: ena: fix update of interrupt moderation register
net: ena: remove all old adaptive rx interrupt moderation code from ena_com
net: ena: remove ena_restore_ethtool_params() and relevant fields
net: ena: remove old adaptive interrupt moderation code from ena_netdev
net: ena: remove code duplication in ena_com_update_nonadaptive_moderation_interval _*()
net: ena: enable the interrupt_moderation in driver_supported_features
net: ena: reimplement set/get_coalesce()
net: ena: switch to dim algorithm for rx adaptive interrupt moderation
net: ena: add intr_moder_rx_interval to struct ena_com_dev and use it
net: phy: adin: implement Energy Detect Powerdown mode via phy-tunable
ethtool: implement Energy Detect Powerdown support via phy-tunable
xen-netfront: do not assume sk_buff_head list is empty in error handling
s390/ctcm: Delete unnecessary checks before the macro call “dev_kfree_skb”
net: ena: don't wake up tx queue when down
drop_monitor: Better sanitize notified packets
...
Diffstat (limited to 'net/sched/sch_taprio.c')
-rw-r--r-- | net/sched/sch_taprio.c | 413 |
1 files changed, 366 insertions, 47 deletions
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d8bc2ec5cd6..2f7b34205c82 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 -#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)) #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) +#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) struct sched_entry { struct list_head list; @@ -75,9 +75,16 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; + struct sk_buff *(*dequeue)(struct Qdisc *sch); + struct sk_buff *(*peek)(struct Qdisc *sch); u32 txtime_delay; }; +struct __tc_taprio_qopt_offload { + refcount_t users; + struct tc_taprio_qopt_offload offload; +}; + static ktime_t sched_base_time(const struct sched_gate_list *sched) { if (!sched) @@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } +static bool taprio_flags_valid(u32 flags) +{ + /* Make sure no other flag bits are set. */ + if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | + TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + /* txtime-assist and full offload are mutually exclusive */ + if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + return true; +} + /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } -static struct sk_buff *taprio_peek(struct Qdisc *sch) +static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) return NULL; } +static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->peek(sch); +} + static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -550,6 +600,40 @@ done: return skb; } +static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + continue; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -672,10 +756,6 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, -}; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -936,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch, struct taprio_sched *q = qdisc_priv(sch); ktime_t expires; + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + return; + expires = hrtimer_get_expires(&q->advance_timer); if (expires == 0) expires = KTIME_MAX; @@ -1015,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q, } } +static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) +{ + size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + + sizeof(struct __tc_taprio_qopt_offload); + struct __tc_taprio_qopt_offload *__offload; + + __offload = kzalloc(size, GFP_KERNEL); + if (!__offload) + return NULL; + + refcount_set(&__offload->users, 1); + + return &__offload->offload; +} + +struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload + *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + refcount_inc(&__offload->users); + + return offload; +} +EXPORT_SYMBOL_GPL(taprio_offload_get); + +void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + if (!refcount_dec_and_test(&__offload->users)) + return; + + kfree(__offload); +} +EXPORT_SYMBOL_GPL(taprio_offload_free); + +/* The function will only serve to keep the pointers to the "oper" and "admin" + * schedules valid in relation to their base times, so when calling dump() the + * users looks at the right schedules. + * When using full offload, the admin configuration is promoted to oper at the + * base_time in the PHC time domain. But because the system time is not + * necessarily in sync with that, we can't just trigger a hrtimer to call + * switch_schedules at the right hardware time. + * At the moment we call this by hand right away from taprio, but in the future + * it will be useful to create a mechanism for drivers to notify taprio of the + * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). + * This is left as TODO. + */ +void taprio_offload_config_changed(struct taprio_sched *q) +{ + struct sched_gate_list *oper, *admin; + + spin_lock(&q->current_entry_lock); + + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); + + switch_schedules(q, &admin, &oper); + + spin_unlock(&q->current_entry_lock); +} + +static void taprio_sched_to_offload(struct taprio_sched *q, + struct sched_gate_list *sched, + const struct tc_mqprio_qopt *mqprio, + struct tc_taprio_qopt_offload *offload) +{ + struct sched_entry *entry; + int i = 0; + + offload->base_time = sched->base_time; + offload->cycle_time = sched->cycle_time; + offload->cycle_time_extension = sched->cycle_time_extension; + + list_for_each_entry(entry, &sched->entries, list) { + struct tc_taprio_sched_entry *e = &offload->entries[i]; + + e->command = entry->command; + e->interval = entry->interval; + e->gate_mask = entry->gate_mask; + i++; + } + + offload->num_entries = i; +} + +static int taprio_enable_offload(struct net_device *dev, + struct tc_mqprio_qopt *mqprio, + struct taprio_sched *q, + struct sched_gate_list *sched, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err = 0; + + if (!ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support taprio offload"); + return -EOPNOTSUPP; + } + + offload = taprio_offload_alloc(sched->num_entries); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory for enabling offload mode"); + return -ENOMEM; + } + offload->enable = 1; + taprio_sched_to_offload(q, sched, mqprio, offload); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to setup taprio offload"); + goto done; + } + + taprio_offload_config_changed(q); + +done: + taprio_offload_free(offload); + + return err; +} + +static int taprio_disable_offload(struct net_device *dev, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err; + + if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + return 0; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + offload = taprio_offload_alloc(0); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory to disable offload mode"); + return -ENOMEM; + } + offload->enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to disable offload"); + goto out; + } + +out: + taprio_offload_free(offload); + + return err; +} + +/* If full offload is enabled, the only possible clockid is the net device's + * PHC. For that reason, specifying a clockid through netlink is incorrect. + * For txtime-assist, it is implicitly assumed that the device's PHC is kept + * in sync with the specified clockid via a user space daemon such as phc2sys. + * For both software taprio and txtime-assist, the clockid is used for the + * hrtimer that advances the schedule and hence mandatory. + */ +static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err = -EINVAL; + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_ts_info info = { + .cmd = ETHTOOL_GET_TS_INFO, + .phc_index = -1, + }; + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, + "The 'clockid' cannot be specified for full offload"); + goto out; + } + + if (ops && ops->get_ts_info) + err = ops->get_ts_info(dev, &info); + + if (err || info.phc_index < 0) { + NL_SET_ERR_MSG(extack, + "Device does not have a PTP clock"); + err = -ENOTSUPP; + goto out; + } + } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, + "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto out; + } + + switch (clockid) { + case CLOCK_REALTIME: + q->tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + q->tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + q->tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + q->tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto out; + } + + q->clockid = clockid; + } else { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + goto out; + } +out: + return err; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1024,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; u32 taprio_flags = 0; - int i, err, clockid; unsigned long flags; ktime_t start; + int i, err; err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, taprio_policy, extack); @@ -1042,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (q->flags != 0 && q->flags != taprio_flags) { NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); return -EOPNOTSUPP; - } else if (!FLAGS_VALID(taprio_flags)) { + } else if (!taprio_flags_valid(taprio_flags)) { NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); return -EINVAL; } @@ -1082,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || - (q->clockid != -1 && q->clockid != clockid)) { - NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); - err = -ENOTSUPP; - goto free_sched; - } - - q->clockid = clockid; - } - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); - err = -EINVAL; + err = taprio_parse_clockid(sch, tb, extack); + if (err < 0) goto free_sched; - } taprio_set_picos_per_byte(dev, q); + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + else + err = taprio_disable_offload(dev, q, extack); + if (err) + goto free_sched; + /* Protects against enqueue()/dequeue() */ spin_lock_bh(qdisc_lock(sch)); @@ -1120,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && + !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; @@ -1138,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - switch (q->clockid) { - case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - err = -EINVAL; - goto unlock; + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + q->dequeue = taprio_dequeue_offload; + q->peek = taprio_peek_offload; + } else { + /* Be sure to always keep the function pointers + * in a consistent state. + */ + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; } err = taprio_get_start_time(sch, new_admin, &start); @@ -1216,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); + taprio_disable_offload(dev, q, NULL); + if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) qdisc_put(q->qdiscs[i]); @@ -1245,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; + q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1427,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && + nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) |