diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 10:15:09 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 10:15:09 -0800 |
commit | 3051bf36c25d5153051704291782f8d44e744d36 (patch) | |
tree | 72dfc8a1d12675c6f2981d13102df954b678f11b /net/ipv4/ipmr.c | |
parent | 1e74a2eb1f5cc7f2f2b5aa9c9eeecbcf352220a3 (diff) | |
parent | 005c3490e9db23738d91e02788606c0fe4734723 (diff) | |
download | linux-3051bf36c25d5153051704291782f8d44e744d36.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Highlights:
1) Support TX_RING in AF_PACKET TPACKET_V3 mode, from Sowmini
Varadhan.
2) Simplify classifier state on sk_buff in order to shrink it a bit.
From Willem de Bruijn.
3) Introduce SIPHASH and it's usage for secure sequence numbers and
syncookies. From Jason A. Donenfeld.
4) Reduce CPU usage for ICMP replies we are going to limit or
suppress, from Jesper Dangaard Brouer.
5) Introduce Shared Memory Communications socket layer, from Ursula
Braun.
6) Add RACK loss detection and allow it to actually trigger fast
recovery instead of just assisting after other algorithms have
triggered it. From Yuchung Cheng.
7) Add xmit_more and BQL support to mvneta driver, from Simon Guinot.
8) skb_cow_data avoidance in esp4 and esp6, from Steffen Klassert.
9) Export MPLS packet stats via netlink, from Robert Shearman.
10) Significantly improve inet port bind conflict handling, especially
when an application is restarted and changes it's setting of
reuseport. From Josef Bacik.
11) Implement TX batching in vhost_net, from Jason Wang.
12) Extend the dummy device so that VF (virtual function) features,
such as configuration, can be more easily tested. From Phil
Sutter.
13) Avoid two atomic ops per page on x86 in bnx2x driver, from Eric
Dumazet.
14) Add new bpf MAP, implementing a longest prefix match trie. From
Daniel Mack.
15) Packet sample offloading support in mlxsw driver, from Yotam Gigi.
16) Add new aquantia driver, from David VomLehn.
17) Add bpf tracepoints, from Daniel Borkmann.
18) Add support for port mirroring to b53 and bcm_sf2 drivers, from
Florian Fainelli.
19) Remove custom busy polling in many drivers, it is done in the core
networking since 4.5 times. From Eric Dumazet.
20) Support XDP adjust_head in virtio_net, from John Fastabend.
21) Fix several major holes in neighbour entry confirmation, from
Julian Anastasov.
22) Add XDP support to bnxt_en driver, from Michael Chan.
23) VXLAN offloads for enic driver, from Govindarajulu Varadarajan.
24) Add IPVTAP driver (IP-VLAN based tap driver) from Sainath Grandhi.
25) Support GRO in IPSEC protocols, from Steffen Klassert"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1764 commits)
Revert "ath10k: Search SMBIOS for OEM board file extension"
net: socket: fix recvmmsg not returning error from sock_error
bnxt_en: use eth_hw_addr_random()
bpf: fix unlocking of jited image when module ronx not set
arch: add ARCH_HAS_SET_MEMORY config
net: napi_watchdog() can use napi_schedule_irqoff()
tcp: Revert "tcp: tcp_probe: use spin_lock_bh()"
net/hsr: use eth_hw_addr_random()
net: mvpp2: enable building on 64-bit platforms
net: mvpp2: switch to build_skb() in the RX path
net: mvpp2: simplify MVPP2_PRS_RI_* definitions
net: mvpp2: fix indentation of MVPP2_EXT_GLOBAL_CTRL_DEFAULT
net: mvpp2: remove unused register definitions
net: mvpp2: simplify mvpp2_bm_bufs_add()
net: mvpp2: drop useless fields in mvpp2_bm_pool and related code
net: mvpp2: remove unused 'tx_skb' field of 'struct mvpp2_tx_queue'
net: mvpp2: release reference to txq_cpu[] entry after unmapping
net: mvpp2: handle too large value in mvpp2_rx_time_coal_set()
net: mvpp2: handle too large value handling in mvpp2_rx_pkts_coal_set()
net: mvpp2: remove useless arguments in mvpp2_rx_{pkts, time}_coal_set
...
Diffstat (limited to 'net/ipv4/ipmr.c')
-rw-r--r-- | net/ipv4/ipmr.c | 266 |
1 files changed, 144 insertions, 122 deletions
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index efc1e76d4977..beacd028848c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -299,10 +299,29 @@ static void __net_exit ipmr_rules_exit(struct net *net) } #endif +static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct mfc_cache_cmp_arg *cmparg = arg->key; + struct mfc_cache *c = (struct mfc_cache *)ptr; + + return cmparg->mfc_mcastgrp != c->mfc_mcastgrp || + cmparg->mfc_origin != c->mfc_origin; +} + +static const struct rhashtable_params ipmr_rht_params = { + .head_offset = offsetof(struct mfc_cache, mnode), + .key_offset = offsetof(struct mfc_cache, cmparg), + .key_len = sizeof(struct mfc_cache_cmp_arg), + .nelem_hint = 3, + .locks_mul = 1, + .obj_cmpfn = ipmr_hash_cmp, + .automatic_shrinking = true, +}; + static struct mr_table *ipmr_new_table(struct net *net, u32 id) { struct mr_table *mrt; - unsigned int i; /* "pimreg%u" should not exceed 16 bytes (IFNAMSIZ) */ if (id != RT_TABLE_DEFAULT && id >= 1000000000) @@ -318,10 +337,8 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) write_pnet(&mrt->net, net); mrt->id = id; - /* Forwarding cache */ - for (i = 0; i < MFC_LINES; i++) - INIT_LIST_HEAD(&mrt->mfc_cache_array[i]); - + rhltable_init(&mrt->mfc_hash, &ipmr_rht_params); + INIT_LIST_HEAD(&mrt->mfc_cache_list); INIT_LIST_HEAD(&mrt->mfc_unres_queue); setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process, @@ -338,6 +355,7 @@ static void ipmr_free_table(struct mr_table *mrt) { del_timer_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, true); + rhltable_destroy(&mrt->mfc_hash); kfree(mrt); } @@ -839,13 +857,17 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, __be32 origin, __be32 mcastgrp) { - int line = MFC_HASH(mcastgrp, origin); + struct mfc_cache_cmp_arg arg = { + .mfc_mcastgrp = mcastgrp, + .mfc_origin = origin + }; + struct rhlist_head *tmp, *list; struct mfc_cache *c; - list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) { - if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp) - return c; - } + list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + return c; + return NULL; } @@ -853,13 +875,16 @@ static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt, static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, int vifi) { - int line = MFC_HASH(htonl(INADDR_ANY), htonl(INADDR_ANY)); + struct mfc_cache_cmp_arg arg = { + .mfc_mcastgrp = htonl(INADDR_ANY), + .mfc_origin = htonl(INADDR_ANY) + }; + struct rhlist_head *tmp, *list; struct mfc_cache *c; - list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) - if (c->mfc_origin == htonl(INADDR_ANY) && - c->mfc_mcastgrp == htonl(INADDR_ANY) && - c->mfc_un.res.ttls[vifi] < 255) + list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (c->mfc_un.res.ttls[vifi] < 255) return c; return NULL; @@ -869,29 +894,51 @@ static struct mfc_cache *ipmr_cache_find_any_parent(struct mr_table *mrt, static struct mfc_cache *ipmr_cache_find_any(struct mr_table *mrt, __be32 mcastgrp, int vifi) { - int line = MFC_HASH(mcastgrp, htonl(INADDR_ANY)); + struct mfc_cache_cmp_arg arg = { + .mfc_mcastgrp = mcastgrp, + .mfc_origin = htonl(INADDR_ANY) + }; + struct rhlist_head *tmp, *list; struct mfc_cache *c, *proxy; if (mcastgrp == htonl(INADDR_ANY)) goto skip; - list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) - if (c->mfc_origin == htonl(INADDR_ANY) && - c->mfc_mcastgrp == mcastgrp) { - if (c->mfc_un.res.ttls[vifi] < 255) - return c; - - /* It's ok if the vifi is part of the static tree */ - proxy = ipmr_cache_find_any_parent(mrt, - c->mfc_parent); - if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) - return c; - } + list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) { + if (c->mfc_un.res.ttls[vifi] < 255) + return c; + + /* It's ok if the vifi is part of the static tree */ + proxy = ipmr_cache_find_any_parent(mrt, c->mfc_parent); + if (proxy && proxy->mfc_un.res.ttls[vifi] < 255) + return c; + } skip: return ipmr_cache_find_any_parent(mrt, vifi); } +/* Look for a (S,G,iif) entry if parent != -1 */ +static struct mfc_cache *ipmr_cache_find_parent(struct mr_table *mrt, + __be32 origin, __be32 mcastgrp, + int parent) +{ + struct mfc_cache_cmp_arg arg = { + .mfc_mcastgrp = mcastgrp, + .mfc_origin = origin, + }; + struct rhlist_head *tmp, *list; + struct mfc_cache *c; + + list = rhltable_lookup(&mrt->mfc_hash, &arg, ipmr_rht_params); + rhl_for_each_entry_rcu(c, tmp, list, mnode) + if (parent == -1 || parent == c->mfc_parent) + return c; + + return NULL; +} + /* Allocate a multicast cache entry */ static struct mfc_cache *ipmr_cache_alloc(void) { @@ -1028,10 +1075,10 @@ static int ipmr_cache_report(struct mr_table *mrt, static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb) { + const struct iphdr *iph = ip_hdr(skb); + struct mfc_cache *c; bool found = false; int err; - struct mfc_cache *c; - const struct iphdr *iph = ip_hdr(skb); spin_lock_bh(&mfc_unres_lock); list_for_each_entry(c, &mrt->mfc_unres_queue, list) { @@ -1095,46 +1142,39 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { - int line; - struct mfc_cache *c, *next; + struct mfc_cache *c; - line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, + mfc->mfcc_mcastgrp.s_addr, parent); + rcu_read_unlock(); + if (!c) + return -ENOENT; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); + list_del_rcu(&c->list); + mroute_netlink_event(mrt, c, RTM_DELROUTE); + ipmr_cache_free(c); - list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) { - if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && - (parent == -1 || parent == c->mfc_parent)) { - list_del_rcu(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); - return 0; - } - } - return -ENOENT; + return 0; } static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, struct mfcctl *mfc, int mrtsock, int parent) { - bool found = false; - int line; struct mfc_cache *uc, *c; + bool found; + int ret; if (mfc->mfcc_parent >= MAXVIFS) return -ENFILE; - line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - - list_for_each_entry(c, &mrt->mfc_cache_array[line], list) { - if (c->mfc_origin == mfc->mfcc_origin.s_addr && - c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr && - (parent == -1 || parent == c->mfc_parent)) { - found = true; - break; - } - } - - if (found) { + /* The entries are added/deleted only under RTNL */ + rcu_read_lock(); + c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, + mfc->mfcc_mcastgrp.s_addr, parent); + rcu_read_unlock(); + if (c) { write_lock_bh(&mrt_lock); c->mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls); @@ -1160,8 +1200,14 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (!mrtsock) c->mfc_flags |= MFC_STATIC; - list_add_rcu(&c->list, &mrt->mfc_cache_array[line]); - + ret = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->mnode, + ipmr_rht_params); + if (ret) { + pr_err("ipmr: rhtable insert error %d\n", ret); + ipmr_cache_free(c); + return ret; + } + list_add_tail_rcu(&c->list, &mrt->mfc_cache_list); /* Check to see if we resolved a queued list. If so we * need to send on the frames and tidy up. */ @@ -1191,9 +1237,9 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, /* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, bool all) { - int i; + struct mfc_cache *c, *tmp; LIST_HEAD(list); - struct mfc_cache *c, *next; + int i; /* Shut down all active vif entries */ for (i = 0; i < mrt->maxvif; i++) { @@ -1204,19 +1250,18 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) unregister_netdevice_many(&list); /* Wipe the cache */ - for (i = 0; i < MFC_LINES; i++) { - list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) { - if (!all && (c->mfc_flags & MFC_STATIC)) - continue; - list_del_rcu(&c->list); - mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); - } + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { + if (!all && (c->mfc_flags & MFC_STATIC)) + continue; + rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); + list_del_rcu(&c->list); + mroute_netlink_event(mrt, c, RTM_DELROUTE); + ipmr_cache_free(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { spin_lock_bh(&mfc_unres_lock); - list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) { + list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { list_del(&c->list); mroute_netlink_event(mrt, c, RTM_DELROUTE); ipmr_destroy_unres(mrt, c); @@ -1791,9 +1836,9 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *cache, int local) { + int true_vifi = ipmr_find_vif(mrt, skb->dev); int psend = -1; int vif, ct; - int true_vifi = ipmr_find_vif(mrt, skb->dev); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; @@ -2091,8 +2136,10 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ct; /* If cache is unresolved, don't try to parse IIF and OIF */ - if (c->mfc_parent >= MAXVIFS) + if (c->mfc_parent >= MAXVIFS) { + rtm->rtm_flags |= RTNH_F_UNRESOLVED; return -ENOENT; + } if (VIF_EXISTS(mrt, c->mfc_parent) && nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) @@ -2134,7 +2181,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, - struct rtmsg *rtm, int nowait, u32 portid) + struct rtmsg *rtm, u32 portid) { struct mfc_cache *cache; struct mr_table *mrt; @@ -2158,11 +2205,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, struct net_device *dev; int vif = -1; - if (nowait) { - rcu_read_unlock(); - return -EAGAIN; - } - dev = skb->dev; read_lock(&mrt_lock); if (dev) @@ -2296,34 +2338,30 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) struct mr_table *mrt; struct mfc_cache *mfc; unsigned int t = 0, s_t; - unsigned int h = 0, s_h; unsigned int e = 0, s_e; s_t = cb->args[0]; - s_h = cb->args[1]; - s_e = cb->args[2]; + s_e = cb->args[1]; rcu_read_lock(); ipmr_for_each_table(mrt, net) { if (t < s_t) goto next_table; - if (t > s_t) - s_h = 0; - for (h = s_h; h < MFC_LINES; h++) { - list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) { - if (e < s_e) - goto next_entry; - if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE, - NLM_F_MULTI) < 0) - goto done; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) { + if (e < s_e) + goto next_entry; + if (ipmr_fill_mroute(mrt, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) + goto done; next_entry: - e++; - } - e = s_e = 0; + e++; } + e = 0; + s_e = 0; + spin_lock_bh(&mfc_unres_lock); list_for_each_entry(mfc, &mrt->mfc_unres_queue, list) { if (e < s_e) @@ -2340,16 +2378,15 @@ next_entry2: e++; } spin_unlock_bh(&mfc_unres_lock); - e = s_e = 0; - s_h = 0; + e = 0; + s_e = 0; next_table: t++; } done: rcu_read_unlock(); - cb->args[2] = e; - cb->args[1] = h; + cb->args[1] = e; cb->args[0] = t; return skb->len; @@ -2593,10 +2630,8 @@ struct ipmr_mfc_iter { struct seq_net_private p; struct mr_table *mrt; struct list_head *cache; - int ct; }; - static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, struct ipmr_mfc_iter *it, loff_t pos) { @@ -2604,12 +2639,10 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, struct mfc_cache *mfc; rcu_read_lock(); - for (it->ct = 0; it->ct < MFC_LINES; it->ct++) { - it->cache = &mrt->mfc_cache_array[it->ct]; - list_for_each_entry_rcu(mfc, it->cache, list) - if (pos-- == 0) - return mfc; - } + it->cache = &mrt->mfc_cache_list; + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + if (pos-- == 0) + return mfc; rcu_read_unlock(); spin_lock_bh(&mfc_unres_lock); @@ -2636,17 +2669,16 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) it->mrt = mrt; it->cache = NULL; - it->ct = 0; return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct mfc_cache *mfc = v; struct ipmr_mfc_iter *it = seq->private; struct net *net = seq_file_net(seq); struct mr_table *mrt = it->mrt; + struct mfc_cache *mfc = v; ++*pos; @@ -2659,19 +2691,9 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mrt->mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]); - - while (++it->ct < MFC_LINES) { - it->cache = &mrt->mfc_cache_array[it->ct]; - if (list_empty(it->cache)) - continue; - return list_first_entry(it->cache, struct mfc_cache, list); - } - /* exhausted cache_array, show unresolved */ rcu_read_unlock(); it->cache = &mrt->mfc_unres_queue; - it->ct = 0; spin_lock_bh(&mfc_unres_lock); if (!list_empty(it->cache)) @@ -2691,7 +2713,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mrt->mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == &mrt->mfc_cache_array[it->ct]) + else if (it->cache == &mrt->mfc_cache_list) rcu_read_unlock(); } |