diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-05 10:11:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-10-05 10:11:24 -0700 |
commit | 687ee0ad4e897e29f4b41f7a20c866d74c5e0660 (patch) | |
tree | b31a2af35c24a54823674cdd126993b80daeac67 /lib | |
parent | 3ddf40e8c31964b744ff10abb48c8e36a83ec6e7 (diff) | |
parent | 03a1eabc3f54469abd4f1784182851b2e29630cc (diff) | |
download | linux-687ee0ad4e897e29f4b41f7a20c866d74c5e0660.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) BBR TCP congestion control, from Neal Cardwell, Yuchung Cheng and
co. at Google. https://lwn.net/Articles/701165/
2) Do TCP Small Queues for retransmits, from Eric Dumazet.
3) Support collect_md mode for all IPV4 and IPV6 tunnels, from Alexei
Starovoitov.
4) Allow cls_flower to classify packets in ip tunnels, from Amir Vadai.
5) Support DSA tagging in older mv88e6xxx switches, from Andrew Lunn.
6) Support GMAC protocol in iwlwifi mwm, from Ayala Beker.
7) Support ndo_poll_controller in mlx5, from Calvin Owens.
8) Move VRF processing to an output hook and allow l3mdev to be
loopback, from David Ahern.
9) Support SOCK_DESTROY for UDP sockets. Also from David Ahern.
10) Congestion control in RXRPC, from David Howells.
11) Support geneve RX offload in ixgbe, from Emil Tantilov.
12) When hitting pressure for new incoming TCP data SKBs, perform a
partial rathern than a full purge of the OFO queue (which could be
huge). From Eric Dumazet.
13) Convert XFRM state and policy lookups to RCU, from Florian Westphal.
14) Support RX network flow classification to igb, from Gangfeng Huang.
15) Hardware offloading of eBPF in nfp driver, from Jakub Kicinski.
16) New skbmod packet action, from Jamal Hadi Salim.
17) Remove some inefficiencies in snmp proc output, from Jia He.
18) Add FIB notifications to properly propagate route changes to
hardware which is doing forwarding offloading. From Jiri Pirko.
19) New dsa driver for qca8xxx chips, from John Crispin.
20) Implement RFC7559 ipv6 router solicitation backoff, from Maciej
Żenczykowski.
21) Add L3 mode to ipvlan, from Mahesh Bandewar.
22) Support 802.1ad in mlx4, from Moshe Shemesh.
23) Support hardware LRO in mediatek driver, from Nelson Chang.
24) Add TC offloading to mlx5, from Or Gerlitz.
25) Convert various drivers to ethtool ksettings interfaces, from
Philippe Reynes.
26) TX max rate limiting for cxgb4, from Rahul Lakkireddy.
27) NAPI support for ath10k, from Rajkumar Manoharan.
28) Support XDP in mlx5, from Rana Shahout and Saeed Mahameed.
29) UDP replicast support in TIPC, from Richard Alpe.
30) Per-queue statistics for qed driver, from Sudarsana Reddy Kalluru.
31) Support BQL in thunderx driver, from Sunil Goutham.
32) TSO support in alx driver, from Tobias Regnery.
33) Add stream parser engine and use it in kcm.
34) Support async DHCP replies in ipconfig module, from Uwe
Kleine-König.
35) DSA port fast aging for mv88e6xxx driver, from Vivien Didelot.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1715 commits)
mlxsw: switchx2: Fix misuse of hard_header_len
mlxsw: spectrum: Fix misuse of hard_header_len
net/faraday: Stop NCSI device on shutdown
net/ncsi: Introduce ncsi_stop_dev()
net/ncsi: Rework the channel monitoring
net/ncsi: Allow to extend NCSI request properties
net/ncsi: Rework request index allocation
net/ncsi: Don't probe on the reserved channel ID (0x1f)
net/ncsi: Introduce NCSI_RESERVED_CHANNEL
net/ncsi: Avoid unused-value build warning from ia64-linux-gcc
net: Add netdev all_adj_list refcnt propagation to fix panic
net: phy: Add Edge-rate driver for Microsemi PHYs.
vmxnet3: Wake queue from reset work
i40e: avoid NULL pointer dereference and recursive errors on early PCI error
qed: Add RoCE ll2 & GSI support
qed: Add support for memory registeration verbs
qed: Add support for QP verbs
qed: PD,PKEY and CQ verb support
qed: Add support for RoCE hw init
qede: Add qedr framework
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/random32.c | 4 | ||||
-rw-r--r-- | lib/rhashtable.c | 300 | ||||
-rw-r--r-- | lib/test_bpf.c | 1 | ||||
-rw-r--r-- | lib/win_minmax.c | 98 |
5 files changed, 326 insertions, 79 deletions
diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8ec297..df747e5eeb7a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/random32.c b/lib/random32.c index 69ed593aab07..915982b304bb 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -81,7 +81,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(net_rand_state); return res; } @@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes) struct rnd_state *state = &get_cpu_var(net_rand_state); prandom_bytes_state(state, buf, bytes); - put_cpu_var(state); + put_cpu_var(net_rand_state); } EXPORT_SYMBOL(prandom_bytes); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 56054e541a0f..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } -static bool rhashtable_check_elasticity(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - unsigned int elasticity = ht->elasticity; - struct rhash_head *head; - - rht_for_each(head, tbl, hash) - if (!--elasticity) - return true; - - return false; -} - -int rhashtable_insert_rehash(struct rhashtable *ht, - struct bucket_table *tbl) +static int rhashtable_insert_rehash(struct rhashtable *ht, + struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; @@ -439,61 +425,172 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *tbl) +static void *rhashtable_lookup_one(struct rhashtable *ht, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_head __rcu **pprev; struct rhash_head *head; - unsigned int hash; - int err; + int elasticity; - tbl = rhashtable_last_table(ht, tbl); - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *list; + struct rhlist_head *plist; - err = -EEXIST; - if (key && rhashtable_lookup_fast(ht, key, ht->p)) - goto exit; + elasticity--; + if (!key || + (ht->p.obj_cmpfn ? + ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; - err = -E2BIG; - if (unlikely(rht_grow_above_max(ht, tbl))) - goto exit; + if (!ht->rhlist) + return rht_obj(ht, head); + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + return NULL; + } + + if (elasticity <= 0) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-ENOENT); +} + +static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash, + struct rhash_head *obj, + void *data) +{ + struct bucket_table *new_tbl; + struct rhash_head *head; + + if (!IS_ERR_OR_NULL(data)) + return ERR_PTR(-EEXIST); + + if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); - err = -EAGAIN; - if (rhashtable_check_elasticity(ht, tbl, hash) || - rht_grow_above_100(ht, tbl)) - goto exit; + new_tbl = rcu_dereference(tbl->future_tbl); + if (new_tbl) + return new_tbl; - err = 0; + if (PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + + if (unlikely(rht_grow_above_max(ht, tbl))) + return ERR_PTR(-E2BIG); + + if (unlikely(rht_grow_above_100(ht, tbl))) + return ERR_PTR(-EAGAIN); head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (ht->rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); -exit: - spin_unlock(rht_bucket_lock(tbl, hash)); + return NULL; +} - if (err == 0) - return NULL; - else if (err == -EAGAIN) - return tbl; - else - return ERR_PTR(err); +static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int hash; + spinlock_t *lock; + void *data; + + tbl = rcu_dereference(ht->tbl); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rcu_dereference(tbl->future_tbl); + } + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + while (!IS_ERR_OR_NULL(new_tbl)) { + tbl = new_tbl; + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + spin_unlock(rht_bucket_lock(tbl, hash)); + } + + spin_unlock_bh(lock); + + if (PTR_ERR(data) == -EAGAIN) + data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: + -EAGAIN); + + return data; +} + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + void *data; + + do { + rcu_read_lock(); + data = rhashtable_try_insert(ht, key, obj); + rcu_read_unlock(); + } while (PTR_ERR(data) == -EAGAIN); + + return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** - * rhashtable_walk_init - Initialise an iterator + * rhashtable_walk_enter - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator - * @gfp: GFP flags for allocations * * This function prepares a hash table walk. * @@ -508,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * This function may sleep so you must not call it from interrupt * context or with spin locks held. * - * You must call rhashtable_walk_exit if this function returns - * successfully. + * You must call rhashtable_walk_exit after this function returns. */ -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp) +void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; - iter->walker = kmalloc(sizeof(*iter->walker), gfp); - if (!iter->walker) - return -ENOMEM; - spin_lock(&ht->lock); - iter->walker->tbl = + iter->walker.tbl = rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); - list_add(&iter->walker->list, &iter->walker->tbl->walkers); + list_add(&iter->walker.list, &iter->walker.tbl->walkers); spin_unlock(&ht->lock); - - return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_init); +EXPORT_SYMBOL_GPL(rhashtable_walk_enter); /** * rhashtable_walk_exit - Free an iterator @@ -542,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { spin_lock(&iter->ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&iter->ht->lock); - kfree(iter->walker); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); @@ -571,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) rcu_read_lock(); spin_lock(&ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker->tbl) { - iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); + if (!iter->walker.tbl) { + iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -598,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; + struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } goto next; } @@ -611,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { + if (rhlist) { + list = container_of(p, struct rhlist_head, + rhead); + do { + if (!skip) + goto next; + skip--; + list = rcu_dereference(list->next); + } while (list); + + continue; + } if (!skip) break; skip--; @@ -620,7 +725,8 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - return rht_obj(ht, p); + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; @@ -631,8 +737,8 @@ next: /* Ensure we see any new tables. */ smp_rmb(); - iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (iter->walker->tbl) { + iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker.tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); @@ -652,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { struct rhashtable *ht; - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; if (!tbl) goto out; @@ -661,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) spin_lock(&ht->lock); if (tbl->rehash < tbl->size) - list_add(&iter->walker->list, &tbl->walkers); + list_add(&iter->walker.list, &tbl->walkers); else - iter->walker->tbl = NULL; + iter->walker.tbl = NULL; spin_unlock(&ht->lock); iter->p = NULL; @@ -809,6 +915,48 @@ int rhashtable_init(struct rhashtable *ht, EXPORT_SYMBOL_GPL(rhashtable_init); /** + * rhltable_init - initialize a new hash list table + * @hlt: hash list table to be initialized + * @params: configuration parameters + * + * Initializes a new hash list table. + * + * See documentation for rhashtable_init. + */ +int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +{ + int err; + + /* No rhlist NULLs marking for now. */ + if (params->nulls_base) + return -EINVAL; + + err = rhashtable_init(&hlt->ht, params); + hlt->ht.rhlist = true; + return err; +} +EXPORT_SYMBOL_GPL(rhltable_init); + +static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, + void (*free_fn)(void *ptr, void *arg), + void *arg) +{ + struct rhlist_head *list; + + if (!ht->rhlist) { + free_fn(rht_obj(ht, obj), arg); + return; + } + + list = container_of(obj, struct rhlist_head, rhead); + do { + obj = &list->rhead; + list = rht_dereference(list->next, ht); + free_fn(rht_obj(ht, obj), arg); + } while (list); +} + +/** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy * @free_fn: callback to release resources of element @@ -845,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); + rhashtable_free_one(ht, pos, free_fn, arg); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 93f45011a59d..94346b4d8984 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; + skb->vlan_proto = htons(ETH_P_IP); skb->dev = &dev; skb->dev->ifindex = SKB_DEV_IFINDEX; skb->dev->type = SKB_DEV_TYPE; diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 000000000000..c8420d404926 --- /dev/null +++ b/lib/win_minmax.c @@ -0,0 +1,98 @@ +/** + * lib/minmax.c: windowed min/max tracker + * + * Kathleen Nichols' algorithm for tracking the minimum (or maximum) + * value of a data stream over some fixed time interval. (E.g., + * the minimum RTT over the past five minutes.) It uses constant + * space and constant time per update yet almost always delivers + * the same minimum as an implementation that has to keep all the + * data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of + * the n'th best >= n-1'th best. It also makes sure that the three + * values are widely separated in the time window since that bounds + * the worse case error when that data is monotonically increasing + * over the window. + * + * Upon getting a new min, we can forget everything earlier because + * it has no value - the new min is <= everything else in the window + * by definition and it's the most recent. So we restart fresh on + * every new min and overwrites 2nd & 3rd choices. The same property + * holds for 2nd & 3rd best. + */ +#include <linux/module.h> +#include <linux/win_minmax.h> + +/* As time advances, update the 1st, 2nd, and 3rd choices. */ +static u32 minmax_subwin_update(struct minmax *m, u32 win, + const struct minmax_sample *val) +{ + u32 dt = val->t - m->s[0].t; + + if (unlikely(dt > win)) { + /* + * Passed entire window without a new val so make 2nd + * choice the new val & 3rd choice the new 2nd choice. + * we may have to iterate this since our 2nd choice + * may also be outside the window (we checked on entry + * that the third choice was in the window). + */ + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + if (unlikely(val->t - m->s[0].t > win)) { + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + } + } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { + /* + * We've passed a quarter of the window without a new val + * so take a 2nd choice from the 2nd quarter of the window. + */ + m->s[2] = m->s[1] = *val; + } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { + /* + * We've passed half the window without finding a new val + * so take a 3rd choice from the last half of the window + */ + m->s[2] = *val; + } + return m->s[0].v; +} + +/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v >= m->s[0].v) || /* found new max? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v >= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v >= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} +EXPORT_SYMBOL(minmax_running_max); + +/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v <= m->s[0].v) || /* found new min? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v <= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v <= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} |