diff options
-rw-r--r-- | include/linux/pkt_sched.h | 20 | ||||
-rw-r--r-- | include/net/red.h | 3 | ||||
-rw-r--r-- | net/sched/sch_sfq.c | 146 |
3 files changed, 152 insertions, 17 deletions
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h index 8f1b928f777c..0d5b79365d03 100644 --- a/include/linux/pkt_sched.h +++ b/include/linux/pkt_sched.h @@ -162,10 +162,30 @@ struct tc_sfq_qopt { unsigned flows; /* Maximal number of flows */ }; +struct tc_sfqred_stats { + __u32 prob_drop; /* Early drops, below max threshold */ + __u32 forced_drop; /* Early drops, after max threshold */ + __u32 prob_mark; /* Marked packets, below max threshold */ + __u32 forced_mark; /* Marked packets, after max threshold */ + __u32 prob_mark_head; /* Marked packets, below max threshold */ + __u32 forced_mark_head;/* Marked packets, after max threshold */ +}; + struct tc_sfq_qopt_v1 { struct tc_sfq_qopt v0; unsigned int depth; /* max number of packets per flow */ unsigned int headdrop; +/* SFQRED parameters */ + __u32 limit; /* HARD maximal flow queue length (bytes) */ + __u32 qth_min; /* Min average length threshold (bytes) */ + __u32 qth_max; /* Max average length threshold (bytes) */ + unsigned char Wlog; /* log(W) */ + unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ + unsigned char Scell_log; /* cell size for idle damping */ + unsigned char flags; + __u32 max_P; /* probability, high resolution */ +/* SFQRED stats */ + struct tc_sfqred_stats stats; }; diff --git a/include/net/red.h b/include/net/red.h index baab385a4736..28068ec614b2 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -199,7 +199,8 @@ static inline void red_set_parms(struct red_parms *p, p->Scell_log = Scell_log; p->Scell_max = (255 << Scell_log); - memcpy(p->Stab, stab, sizeof(p->Stab)); + if (stab) + memcpy(p->Stab, stab, sizeof(p->Stab)); } static inline int red_is_idling(const struct red_vars *v) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 0a7964009e8c..67494aef9acf 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -24,6 +24,7 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/flow_keys.h> +#include <net/red.h> /* Stochastic Fairness Queuing algorithm. @@ -108,24 +109,30 @@ struct sfq_slot { struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ + + unsigned int backlog; + struct red_vars vars; }; struct sfq_sched_data { /* frequently used fields */ int limit; /* limit of total number of packets in this qdisc */ unsigned int divisor; /* number of slots in hash table */ - unsigned int maxflows; /* number of flows in flows array */ - int headdrop; - int maxdepth; /* limit of packets per flow */ + u8 headdrop; + u8 maxdepth; /* limit of packets per flow */ u32 perturbation; - struct tcf_proto *filter_list; - sfq_index cur_depth; /* depth of longest slot */ + u8 cur_depth; /* depth of longest slot */ + u8 flags; unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ - struct sfq_slot *tail; /* current slot in round */ + struct tcf_proto *filter_list; sfq_index *ht; /* Hash table ('divisor' slots) */ struct sfq_slot *slots; /* Flows table ('maxflows' entries) */ + struct red_parms *red_parms; + struct tc_sfqred_stats stats; + struct sfq_slot *tail; /* current slot in round */ + struct sfq_head dep[SFQ_MAX_DEPTH + 1]; /* Linked lists of slots, indexed by depth * dep[0] : list of unused flows @@ -133,6 +140,7 @@ struct sfq_sched_data { * dep[X] : list of flows with X packets */ + unsigned int maxflows; /* number of flows in flows array */ int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; @@ -321,6 +329,7 @@ static unsigned int sfq_drop(struct Qdisc *sch) drop: skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); + slot->backlog -= len; sfq_dec(q, x); kfree_skb(skb); sch->q.qlen--; @@ -341,6 +350,23 @@ drop: return 0; } +/* Is ECN parameter configured */ +static int sfq_prob_mark(const struct sfq_sched_data *q) +{ + return q->flags & TC_RED_ECN; +} + +/* Should packets over max threshold just be marked */ +static int sfq_hard_mark(const struct sfq_sched_data *q) +{ + return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN; +} + +static int sfq_headdrop(const struct sfq_sched_data *q) +{ + return q->headdrop; +} + static int sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { @@ -349,6 +375,8 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) sfq_index x, qlen; struct sfq_slot *slot; int uninitialized_var(ret); + struct sk_buff *head; + int delta; hash = sfq_classify(skb, sch, &ret); if (hash == 0) { @@ -368,24 +396,75 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; + slot->backlog = 0; /* should already be 0 anyway... */ + red_set_vars(&slot->vars); + goto enqueue; } + if (q->red_parms) { + slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms, + &slot->vars, + slot->backlog); + switch (red_action(q->red_parms, + &slot->vars, + slot->vars.qavg)) { + case RED_DONT_MARK: + break; - if (slot->qlen >= q->maxdepth) { - struct sk_buff *head; + case RED_PROB_MARK: + sch->qstats.overlimits++; + if (sfq_prob_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.prob_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + break; + } + } + q->stats.prob_drop++; + goto congestion_drop; + + case RED_HARD_MARK: + sch->qstats.overlimits++; + if (sfq_hard_mark(q)) { + /* We know we have at least one packet in queue */ + if (sfq_headdrop(q) && + INET_ECN_set_ce(slot->skblist_next)) { + q->stats.forced_mark_head++; + break; + } + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + break; + } + } + q->stats.forced_drop++; + goto congestion_drop; + } + } - if (!q->headdrop) + if (slot->qlen >= q->maxdepth) { +congestion_drop: + if (!sfq_headdrop(q)) return qdisc_drop(skb, sch); + /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); - sch->qstats.backlog -= qdisc_pkt_len(head); + delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); + sch->qstats.backlog -= delta; + slot->backlog -= delta; qdisc_drop(head, sch); - sch->qstats.backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); return NET_XMIT_CN; } +enqueue: sch->qstats.backlog += qdisc_pkt_len(skb); + slot->backlog += qdisc_pkt_len(skb); slot_queue_add(slot, skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ @@ -396,6 +475,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) slot->next = q->tail->next; q->tail->next = x; } + /* We could use a bigger initial quantum for new flows */ slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) @@ -439,7 +519,7 @@ next_slot: qdisc_bstats_update(sch, skb); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); - + slot->backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ if (slot->qlen == 0) { q->ht[slot->hash] = SFQ_EMPTY_SLOT; @@ -490,6 +570,8 @@ static void sfq_rehash(struct Qdisc *sch) sfq_dec(q, i); __skb_queue_tail(&list, skb); } + slot->backlog = 0; + red_set_vars(&slot->vars); q->ht[slot->hash] = SFQ_EMPTY_SLOT; } q->tail = NULL; @@ -514,6 +596,11 @@ drop: sch->qstats.backlog -= qdisc_pkt_len(skb); if (slot->qlen >= q->maxdepth) goto drop; slot_queue_add(slot, skb); + if (q->red_parms) + slot->vars.qavg = red_calc_qavg(q->red_parms, + &slot->vars, + slot->backlog); + slot->backlog += qdisc_pkt_len(skb); sfq_inc(q, x); if (slot->qlen == 1) { /* The flow is new */ if (q->tail == NULL) { /* It is the first flow */ @@ -552,6 +639,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct tc_sfq_qopt *ctl = nla_data(opt); struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen; + struct red_parms *p = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -560,7 +648,11 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; - + if (ctl_v1 && ctl_v1->qth_min) { + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + } sch_tree_lock(sch); if (ctl->quantum) { q->quantum = ctl->quantum; @@ -576,6 +668,16 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl_v1) { if (ctl_v1->depth) q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + if (p) { + swap(q->red_parms, p); + red_set_parms(q->red_parms, + ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, + ctl_v1->Plog, ctl_v1->Scell_log, + NULL, + ctl_v1->max_P); + } + q->flags = ctl_v1->flags; q->headdrop = ctl_v1->headdrop; } if (ctl->limit) { @@ -594,6 +696,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) q->perturbation = net_random(); } sch_tree_unlock(sch); + kfree(p); return 0; } @@ -625,6 +728,7 @@ static void sfq_destroy(struct Qdisc *sch) del_timer_sync(&q->perturb_timer); sfq_free(q->ht); sfq_free(q->slots); + kfree(q->red_parms); } static int sfq_init(struct Qdisc *sch, struct nlattr *opt) @@ -683,6 +787,7 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) struct sfq_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_sfq_qopt_v1 opt; + struct red_parms *p = q->red_parms; memset(&opt, 0, sizeof(opt)); opt.v0.quantum = q->quantum; @@ -693,6 +798,17 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb) opt.depth = q->maxdepth; opt.headdrop = q->headdrop; + if (p) { + opt.qth_min = p->qth_min >> p->Wlog; + opt.qth_max = p->qth_max >> p->Wlog; + opt.Wlog = p->Wlog; + opt.Plog = p->Plog; + opt.Scell_log = p->Scell_log; + opt.max_P = p->max_P; + } + memcpy(&opt.stats, &q->stats, sizeof(opt.stats)); + opt.flags = q->flags; + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; @@ -747,15 +863,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, sfq_index idx = q->ht[cl - 1]; struct gnet_stats_queue qs = { 0 }; struct tc_sfq_xstats xstats = { 0 }; - struct sk_buff *skb; if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; qs.qlen = slot->qlen; - slot_queue_walk(slot, skb) - qs.backlog += qdisc_pkt_len(skb); + qs.backlog = slot->backlog; } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; |