From 6a31d2a97c04ffe9b161ec0177a2296366ff9249 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2010 20:00:18 +0000 Subject: fib: cleanups Code style cleanups before upcoming functional changes. C99 initializer for fib_props array. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_semantics.c | 257 +++++++++++++++++++++++++---------------------- 1 file changed, 135 insertions(+), 122 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 20f09c5b31e8..ba52f399a898 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -60,21 +60,30 @@ static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE]; static DEFINE_SPINLOCK(fib_multipath_lock); -#define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \ -for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++) - -#define change_nexthops(fi) { int nhsel; struct fib_nh *nexthop_nh; \ -for (nhsel=0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nexthop_nh++, nhsel++) +#define for_nexthops(fi) { \ + int nhsel; const struct fib_nh *nh; \ + for (nhsel = 0, nh = (fi)->fib_nh; \ + nhsel < (fi)->fib_nhs; \ + nh++, nhsel++) + +#define change_nexthops(fi) { \ + int nhsel; struct fib_nh *nexthop_nh; \ + for (nhsel = 0, nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ + nhsel < (fi)->fib_nhs; \ + nexthop_nh++, nhsel++) #else /* CONFIG_IP_ROUTE_MULTIPATH */ /* Hope, that gcc will optimize it to get rid of dummy loop */ -#define for_nexthops(fi) { int nhsel = 0; const struct fib_nh * nh = (fi)->fib_nh; \ -for (nhsel=0; nhsel < 1; nhsel++) +#define for_nexthops(fi) { \ + int nhsel; const struct fib_nh *nh = (fi)->fib_nh; \ + for (nhsel = 0; nhsel < 1; nhsel++) -#define change_nexthops(fi) { int nhsel = 0; struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ -for (nhsel=0; nhsel < 1; nhsel++) +#define change_nexthops(fi) { \ + int nhsel; \ + struct fib_nh *nexthop_nh = (struct fib_nh *)((fi)->fib_nh); \ + for (nhsel = 0; nhsel < 1; nhsel++) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -86,54 +95,54 @@ static const struct int error; u8 scope; } fib_props[RTN_MAX + 1] = { - { + [RTN_UNSPEC] = { .error = 0, .scope = RT_SCOPE_NOWHERE, - }, /* RTN_UNSPEC */ - { + }, + [RTN_UNICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_UNICAST */ - { + }, + [RTN_LOCAL] = { .error = 0, .scope = RT_SCOPE_HOST, - }, /* RTN_LOCAL */ - { + }, + [RTN_BROADCAST] = { .error = 0, .scope = RT_SCOPE_LINK, - }, /* RTN_BROADCAST */ - { + }, + [RTN_ANYCAST] = { .error = 0, .scope = RT_SCOPE_LINK, - }, /* RTN_ANYCAST */ - { + }, + [RTN_MULTICAST] = { .error = 0, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_MULTICAST */ - { + }, + [RTN_BLACKHOLE] = { .error = -EINVAL, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_BLACKHOLE */ - { + }, + [RTN_UNREACHABLE] = { .error = -EHOSTUNREACH, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_UNREACHABLE */ - { + }, + [RTN_PROHIBIT] = { .error = -EACCES, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_PROHIBIT */ - { + }, + [RTN_THROW] = { .error = -EAGAIN, .scope = RT_SCOPE_UNIVERSE, - }, /* RTN_THROW */ - { + }, + [RTN_NAT] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, - }, /* RTN_NAT */ - { + }, + [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE, - }, /* RTN_XRESOLVE */ + }, }; @@ -142,7 +151,7 @@ static const struct void free_fib_info(struct fib_info *fi) { if (fi->fib_dead == 0) { - printk(KERN_WARNING "Freeing alive fib_info %p\n", fi); + pr_warning("Freeing alive fib_info %p\n", fi); return; } change_nexthops(fi) { @@ -173,7 +182,7 @@ void fib_release_info(struct fib_info *fi) spin_unlock_bh(&fib_info_lock); } -static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) +static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) { const struct fib_nh *onh = ofi->fib_nh; @@ -187,7 +196,7 @@ static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info * #ifdef CONFIG_NET_CLS_ROUTE nh->nh_tclassid != onh->nh_tclassid || #endif - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) + ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD)) return -1; onh++; } endfor_nexthops(fi); @@ -238,7 +247,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 && - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && + ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; } @@ -247,9 +256,8 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) } /* Check, that the gateway is already configured. - Used only by redirect accept routine. + * Used only by redirect accept routine. */ - int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; @@ -264,7 +272,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) hlist_for_each_entry(nh, node, head, nh_hash) { if (nh->nh_dev == dev && nh->nh_gw == gw && - !(nh->nh_flags&RTNH_F_DEAD)) { + !(nh->nh_flags & RTNH_F_DEAD)) { spin_unlock(&fib_info_lock); return 0; } @@ -362,10 +370,10 @@ int fib_detect_death(struct fib_info *fi, int order, } if (state == NUD_REACHABLE) return 0; - if ((state&NUD_VALID) && order != dflt) + if ((state & NUD_VALID) && order != dflt) return 0; - if ((state&NUD_VALID) || - (*last_idx<0 && order > dflt)) { + if ((state & NUD_VALID) || + (*last_idx < 0 && order > dflt)) { *last_resort = fi; *last_idx = order; } @@ -476,69 +484,69 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) /* - Picture - ------- - - Semantics of nexthop is very messy by historical reasons. - We have to take into account, that: - a) gateway can be actually local interface address, - so that gatewayed route is direct. - b) gateway must be on-link address, possibly - described not by an ifaddr, but also by a direct route. - c) If both gateway and interface are specified, they should not - contradict. - d) If we use tunnel routes, gateway could be not on-link. - - Attempt to reconcile all of these (alas, self-contradictory) conditions - results in pretty ugly and hairy code with obscure logic. - - I chose to generalized it instead, so that the size - of code does not increase practically, but it becomes - much more general. - Every prefix is assigned a "scope" value: "host" is local address, - "link" is direct route, - [ ... "site" ... "interior" ... ] - and "universe" is true gateway route with global meaning. - - Every prefix refers to a set of "nexthop"s (gw, oif), - where gw must have narrower scope. This recursion stops - when gw has LOCAL scope or if "nexthop" is declared ONLINK, - which means that gw is forced to be on link. - - Code is still hairy, but now it is apparently logically - consistent and very flexible. F.e. as by-product it allows - to co-exists in peace independent exterior and interior - routing processes. - - Normally it looks as following. - - {universe prefix} -> (gw, oif) [scope link] - | - |-> {link prefix} -> (gw, oif) [scope local] - | - |-> {local prefix} (terminal node) + * Picture + * ------- + * + * Semantics of nexthop is very messy by historical reasons. + * We have to take into account, that: + * a) gateway can be actually local interface address, + * so that gatewayed route is direct. + * b) gateway must be on-link address, possibly + * described not by an ifaddr, but also by a direct route. + * c) If both gateway and interface are specified, they should not + * contradict. + * d) If we use tunnel routes, gateway could be not on-link. + * + * Attempt to reconcile all of these (alas, self-contradictory) conditions + * results in pretty ugly and hairy code with obscure logic. + * + * I chose to generalized it instead, so that the size + * of code does not increase practically, but it becomes + * much more general. + * Every prefix is assigned a "scope" value: "host" is local address, + * "link" is direct route, + * [ ... "site" ... "interior" ... ] + * and "universe" is true gateway route with global meaning. + * + * Every prefix refers to a set of "nexthop"s (gw, oif), + * where gw must have narrower scope. This recursion stops + * when gw has LOCAL scope or if "nexthop" is declared ONLINK, + * which means that gw is forced to be on link. + * + * Code is still hairy, but now it is apparently logically + * consistent and very flexible. F.e. as by-product it allows + * to co-exists in peace independent exterior and interior + * routing processes. + * + * Normally it looks as following. + * + * {universe prefix} -> (gw, oif) [scope link] + * | + * |-> {link prefix} -> (gw, oif) [scope local] + * | + * |-> {local prefix} (terminal node) */ - static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, struct fib_nh *nh) { int err; struct net *net; + struct net_device *dev; net = cfg->fc_nlinfo.nl_net; if (nh->nh_gw) { struct fib_result res; - if (nh->nh_flags&RTNH_F_ONLINK) { - struct net_device *dev; + if (nh->nh_flags & RTNH_F_ONLINK) { if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(net, nh->nh_gw) != RTN_UNICAST) return -EINVAL; - if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) + dev = __dev_get_by_index(net, nh->nh_oif); + if (!dev) return -ENODEV; - if (!(dev->flags&IFF_UP)) + if (!(dev->flags & IFF_UP)) return -ENETDOWN; nh->nh_dev = dev; dev_hold(dev); @@ -559,7 +567,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; - if ((err = fib_lookup(net, &fl, &res)) != 0) + err = fib_lookup(net, &fl, &res); + if (err) return err; } err = -EINVAL; @@ -567,11 +576,12 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, goto out; nh->nh_scope = res.scope; nh->nh_oif = FIB_RES_OIF(res); - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) + nh->nh_dev = dev = FIB_RES_DEV(res); + if (!dev) goto out; - dev_hold(nh->nh_dev); + dev_hold(dev); err = -ENETDOWN; - if (!(nh->nh_dev->flags & IFF_UP)) + if (!(dev->flags & IFF_UP)) goto out; err = 0; out: @@ -580,13 +590,13 @@ out: } else { struct in_device *in_dev; - if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK)) + if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) return -EINVAL; in_dev = inetdev_by_index(net, nh->nh_oif); if (in_dev == NULL) return -ENODEV; - if (!(in_dev->dev->flags&IFF_UP)) { + if (!(in_dev->dev->flags & IFF_UP)) { in_dev_put(in_dev); return -ENETDOWN; } @@ -602,7 +612,9 @@ static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_hash_size - 1); - return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; + return ((__force u32)val ^ + ((__force u32)val >> 7) ^ + ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_hash_alloc(int bytes) @@ -611,7 +623,8 @@ static struct hlist_head *fib_hash_alloc(int bytes) return kzalloc(bytes, GFP_KERNEL); else return (struct hlist_head *) - __get_free_pages(GFP_KERNEL | __GFP_ZERO, get_order(bytes)); + __get_free_pages(GFP_KERNEL | __GFP_ZERO, + get_order(bytes)); } static void fib_hash_free(struct hlist_head *hash, int bytes) @@ -806,7 +819,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(cfg, fi, nexthop_nh)) != 0) + err = fib_check_nh(cfg, fi, nexthop_nh); + if (err != 0) goto failure; } endfor_nexthops(fi) } @@ -819,7 +833,8 @@ struct fib_info *fib_create_info(struct fib_config *cfg) } link_it: - if ((ofi = fib_find_info(fi)) != NULL) { + ofi = fib_find_info(fi); + if (ofi) { fi->fib_dead = 1; free_fib_info(fi); ofi->fib_treeref++; @@ -895,7 +910,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, case RTN_ANYCAST: case RTN_MULTICAST: for_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) + if (nh->nh_flags & RTNH_F_DEAD) continue; if (!flp->oif || flp->oif == nh->nh_oif) break; @@ -906,16 +921,15 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, goto out_fill_res; } #else - if (nhsel < 1) { + if (nhsel < 1) goto out_fill_res; - } #endif endfor_nexthops(fi); continue; default: - printk(KERN_WARNING "fib_semantic_match bad type %#x\n", - fa->fa_type); + pr_warning("fib_semantic_match bad type %#x\n", + fa->fa_type); return -EINVAL; } } @@ -1028,10 +1042,10 @@ nla_put_failure: } /* - Update FIB if: - - local address disappeared -> we must delete all the entries - referring to it. - - device went down -> we must shutdown all nexthops going via it. + * Update FIB if: + * - local address disappeared -> we must delete all the entries + * referring to it. + * - device went down -> we must shutdown all nexthops going via it. */ int fib_sync_down_addr(struct net *net, __be32 local) { @@ -1078,7 +1092,7 @@ int fib_sync_down_dev(struct net_device *dev, int force) prev_fi = fi; dead = 0; change_nexthops(fi) { - if (nexthop_nh->nh_flags&RTNH_F_DEAD) + if (nexthop_nh->nh_flags & RTNH_F_DEAD) dead++; else if (nexthop_nh->nh_dev == dev && nexthop_nh->nh_scope != scope) { @@ -1110,10 +1124,9 @@ int fib_sync_down_dev(struct net_device *dev, int force) #ifdef CONFIG_IP_ROUTE_MULTIPATH /* - Dead device goes up. We wake up dead nexthops. - It takes sense only on multipath routes. + * Dead device goes up. We wake up dead nexthops. + * It takes sense only on multipath routes. */ - int fib_sync_up(struct net_device *dev) { struct fib_info *prev_fi; @@ -1123,7 +1136,7 @@ int fib_sync_up(struct net_device *dev) struct fib_nh *nh; int ret; - if (!(dev->flags&IFF_UP)) + if (!(dev->flags & IFF_UP)) return 0; prev_fi = NULL; @@ -1142,12 +1155,12 @@ int fib_sync_up(struct net_device *dev) prev_fi = fi; alive = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { + if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { alive++; continue; } if (nexthop_nh->nh_dev == NULL || - !(nexthop_nh->nh_dev->flags&IFF_UP)) + !(nexthop_nh->nh_dev->flags & IFF_UP)) continue; if (nexthop_nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) @@ -1169,10 +1182,9 @@ int fib_sync_up(struct net_device *dev) } /* - The algorithm is suboptimal, but it provides really - fair weighted route distribution. + * The algorithm is suboptimal, but it provides really + * fair weighted route distribution. */ - void fib_select_multipath(const struct flowi *flp, struct fib_result *res) { struct fib_info *fi = res->fi; @@ -1182,7 +1194,7 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD)) { + if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) { power += nexthop_nh->nh_weight; nexthop_nh->nh_power = nexthop_nh->nh_weight; } @@ -1198,15 +1210,16 @@ void fib_select_multipath(const struct flowi *flp, struct fib_result *res) /* w should be random number [0..fi->fib_power-1], - it is pretty bad approximation. + * it is pretty bad approximation. */ w = jiffies % fi->fib_power; change_nexthops(fi) { - if (!(nexthop_nh->nh_flags&RTNH_F_DEAD) && + if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) && nexthop_nh->nh_power) { - if ((w -= nexthop_nh->nh_power) <= 0) { + w -= nexthop_nh->nh_power; + if (w <= 0) { nexthop_nh->nh_power--; fi->fib_power--; res->nh_sel = nhsel; -- cgit v1.2.3 From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 10:41:36 +0000 Subject: fib: RCU conversion of fib_lookup() fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 ++ include/net/ip_fib.h | 17 ++++---------- net/core/fib_rules.c | 3 ++- net/ipv4/fib_frontend.c | 27 +++++++++++----------- net/ipv4/fib_hash.c | 5 ++-- net/ipv4/fib_lookup.h | 2 +- net/ipv4/fib_rules.c | 3 ++- net/ipv4/fib_semantics.c | 21 +++++++++++++---- net/ipv4/fib_trie.c | 10 ++++---- net/ipv4/route.c | 59 ++++++++++++++++++++---------------------------- 10 files changed, 72 insertions(+), 77 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ac2fd002812e..106f3097d384 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -31,6 +31,8 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + int flags; +#define FIB_LOOKUP_NOREF 1 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc610..ba3666d31766 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index cfb7d25c172d..21698f8c49ee 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -225,7 +225,8 @@ jumped: err = ops->action(rule, fl, flags, arg); if (err != -EAGAIN) { - if (likely(atomic_inc_not_zero(&rule->refcnt))) { + if ((arg->flags & FIB_LOOKUP_NOREF) || + likely(atomic_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b05c23b05a9f..919f2ad19b49 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) struct fib_result res = { 0 }; struct net_device *dev = NULL; - if (fib_lookup(net, &fl, &res)) + rcu_read_lock(); + if (fib_lookup(net, &fl, &res)) { + rcu_read_unlock(); return NULL; + } if (res.type != RTN_LOCAL) goto out; dev = FIB_RES_DEV(res); @@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) if (dev && devref) dev_hold(dev); out: - fib_res_put(&res); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(__ip_dev_find); @@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net, local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - if (!fib_table_lookup(local_table, &fl, &res)) { + rcu_read_lock(); + if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; - fib_res_put(&res); } + rcu_read_unlock(); } return ret; } @@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type); * - figure out what "logical" interface this packet arrived * and calculate "specific destination" address. * - check, that packet arrived from expected physical interface. + * called with rcu_read_lock() */ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, @@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net *net; no_addr = rpf = accept_local = 0; - rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; @@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (mark && !IN_DEV_SRC_VMARK(in_dev)) fl.mark = 0; } - rcu_read_unlock(); if (in_dev == NULL) goto e_inval; @@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) - goto e_inval_res; + goto e_inval; } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); @@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, #endif if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; - fib_res_put(&res); return ret; } - fib_res_put(&res); if (no_addr) goto last_resort; if (rpf == 1) @@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } - fib_res_put(&res); } return ret; @@ -326,8 +326,6 @@ last_resort: *itag = 0; return 0; -e_inval_res: - fib_res_put(&res); e_inval: return -EINVAL; e_rpf: @@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) local_bh_disable(); frn->tb_id = tb->tb_id; - frn->err = fib_table_lookup(tb, &fl, &res); + rcu_read_lock(); + frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; frn->nh_sel = res.nh_sel; frn->type = res.type; frn->scope = res.scope; - fib_res_put(&res); } + rcu_read_unlock(); local_bh_enable(); } } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4ed7e0dea1bc..83cca68e259c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -244,7 +244,8 @@ fn_new_zone(struct fn_hash *table, int z) } int fib_table_lookup(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res) + const struct flowi *flp, struct fib_result *res, + int fib_flags) { int err; struct fn_zone *fz; @@ -264,7 +265,7 @@ int fib_table_lookup(struct fib_table *tb, err = fib_semantic_match(&f->fn_alias, flp, res, - fz->fz_order); + fz->fz_order, fib_flags); if (err <= 0) goto out; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 637b133973bd..b9c9a9f2aee5 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -22,7 +22,7 @@ struct fib_alias { /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen); + struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 32300521e32c..7981a24f5c7b 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -57,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, + .flags = FIB_LOOKUP_NOREF, }; int err; @@ -94,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if (!tbl) goto errout; - err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); + err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); if (err > 0) err = -EAGAIN; errout: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ba52f399a898..0f80dfc2f7fb 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -148,6 +148,13 @@ static const struct /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + kfree(fi); +} + void free_fib_info(struct fib_info *fi) { if (fi->fib_dead == 0) { @@ -161,7 +168,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree(fi); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) @@ -553,6 +560,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, nh->nh_scope = RT_SCOPE_LINK; return 0; } + rcu_read_lock(); { struct flowi fl = { .nl_u = { @@ -568,8 +576,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (fl.fl4_scope < RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; err = fib_lookup(net, &fl, &res); - if (err) + if (err) { + rcu_read_unlock(); return err; + } } err = -EINVAL; if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) @@ -585,7 +595,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, goto out; err = 0; out: - fib_res_put(&res); + rcu_read_unlock(); return err; } else { struct in_device *in_dev; @@ -879,7 +889,7 @@ failure: /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen) + struct fib_result *res, int prefixlen, int fib_flags) { struct fib_alias *fa; int nh_sel = 0; @@ -943,7 +953,8 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; - atomic_inc(&res->fi->fib_clntref); + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&res->fi->fib_clntref); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a96e5ec211a0..271c89bdf049 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1342,7 +1342,7 @@ err: /* should be called with rcu_read_lock */ static int check_leaf(struct trie *t, struct leaf *l, t_key key, const struct flowi *flp, - struct fib_result *res) + struct fib_result *res, int fib_flags) { struct leaf_info *li; struct hlist_head *hhead = &l->list; @@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, plen); + err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) @@ -1372,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l, } int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res) + struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; int ret; @@ -1399,7 +1399,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res); + ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); goto found; } @@ -1424,7 +1424,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res); + ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); if (ret > 0) goto backtrace; goto found; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 04e0df82b88c..7864d0c48968 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { - src = FIB_RES_PREFSRC(res); - fib_res_put(&res); - } else - src = inet_select_addr(rt->dst.dev, rt->rt_gateway, + else { + rcu_read_lock(); + if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) + src = FIB_RES_PREFSRC(res); + else + src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); + rcu_read_unlock(); + } memcpy(addr, &src, 4); } @@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb, * Such approach solves two big problems: * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. + * called with rcu_read_lock() */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned hash; __be32 spec_dst; int err = -EINVAL; - int free_res = 0; struct net * net = dev_net(dev); /* IP on this device is disabled. */ @@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - if ((err = fib_lookup(net, &fl, &res)) != 0) { + err = fib_lookup(net, &fl, &res); + if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; goto no_route; } - free_res = 1; RT_CACHE_STAT_INC(in_slow_tot); @@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(saddr, daddr, tos, - net->loopback_dev->ifindex, - dev, &spec_dst, &itag, skb->mark); + net->loopback_dev->ifindex, + dev, &spec_dst, &itag, skb->mark); if (err < 0) goto martian_source_keep_err; if (err) @@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_destination; err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); -done: - if (free_res) - fib_res_put(&res); out: return err; brd_input: @@ -2226,7 +2226,7 @@ local_input: rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); - goto done; + goto out; no_route: RT_CACHE_STAT_INC(in_no_route); @@ -2249,21 +2249,21 @@ martian_destination: e_hostunreach: err = -EHOSTUNREACH; - goto done; + goto out; e_inval: err = -EINVAL; - goto done; + goto out; e_nobufs: err = -ENOBUFS; - goto done; + goto out; martian_source: err = -EINVAL; martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - goto done; + goto out; } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2349,6 +2349,7 @@ skip_cache: } EXPORT_SYMBOL(ip_route_input_common); +/* called with rcu_read_lock() */ static int __mkroute_output(struct rtable **result, struct fib_result *res, const struct flowi *fl, @@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result, if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rcu_read_lock(); in_dev = __in_dev_get_rcu(dev_out); - if (!in_dev) { - rcu_read_unlock(); + if (!in_dev) return -EINVAL; - } + if (res->type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - if (res->fi) { - fib_info_put(res->fi); - res->fi = NULL; - } + res->fi = NULL; } else if (res->type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, @@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result, * default one, but do not gateway in this case. * Yes, it is hack. */ - if (res->fi && res->prefixlen < 4) { - fib_info_put(res->fi); + if (res->fi && res->prefixlen < 4) res->fi = NULL; - } } @@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result, return 0; } +/* called with rcu_read_lock() */ static int ip_mkroute_output(struct rtable **rp, struct fib_result *res, const struct flowi *fl, @@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; - int free_res = 0; int err; @@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, err = -ENETUNREACH; goto out; } - free_res = 1; if (res.type == RTN_LOCAL) { if (!fl.fl4_src) fl.fl4_src = fl.fl4_dst; dev_out = net->loopback_dev; fl.oif = dev_out->ifindex; - if (res.fi) - fib_info_put(res.fi); res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; @@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, make_route: err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); - if (free_res) - fib_res_put(&res); out: return err; } -- cgit v1.2.3 From 8723e1b4ad9be4444423b4d41509ce859a629649 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Oct 2010 00:39:26 +0000 Subject: inet: RCU changes in inetdev_by_index() Convert inetdev_by_index() to not increment in_dev refcount. Callers hold RCU or RTNL, and should not decrement in_dev refcount. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 7 ++++--- net/ipv4/fib_semantics.c | 25 +++++++++++-------------- net/ipv4/igmp.c | 2 -- net/ipv4/ip_gre.c | 4 +--- 4 files changed, 16 insertions(+), 22 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index c2ff48fa18c7..dc94b0316b78 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -403,6 +403,9 @@ static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) return inet_insert_ifa(ifa); } +/* Caller must hold RCU or RTNL : + * We dont take a reference on found in_device + */ struct in_device *inetdev_by_index(struct net *net, int ifindex) { struct net_device *dev; @@ -411,7 +414,7 @@ struct in_device *inetdev_by_index(struct net *net, int ifindex) rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) - in_dev = in_dev_get(dev); + in_dev = rcu_dereference_rtnl(dev->ip_ptr); rcu_read_unlock(); return in_dev; } @@ -453,8 +456,6 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg goto errout; } - __in_dev_put(in_dev); - for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { if (tb[IFA_LOCAL] && diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 0f80dfc2f7fb..6734c9cab248 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -590,32 +590,29 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (!dev) goto out; dev_hold(dev); - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) - goto out; - err = 0; -out: - rcu_read_unlock(); - return err; + err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN; } else { struct in_device *in_dev; if (nh->nh_flags & (RTNH_F_PERVASIVE | RTNH_F_ONLINK)) return -EINVAL; + rcu_read_lock(); + err = -ENODEV; in_dev = inetdev_by_index(net, nh->nh_oif); if (in_dev == NULL) - return -ENODEV; - if (!(in_dev->dev->flags & IFF_UP)) { - in_dev_put(in_dev); - return -ENETDOWN; - } + goto out; + err = -ENETDOWN; + if (!(in_dev->dev->flags & IFF_UP)) + goto out; nh->nh_dev = in_dev->dev; dev_hold(nh->nh_dev); nh->nh_scope = RT_SCOPE_HOST; - in_dev_put(in_dev); + err = 0; } - return 0; +out: + rcu_read_unlock(); + return err; } static inline unsigned int fib_laddr_hashfn(__be32 val) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index a525328ec372..c8877c6c7216 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1429,8 +1429,6 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr) if (imr->imr_ifindex) { idev = inetdev_by_index(net, imr->imr_ifindex); - if (idev) - __in_dev_put(idev); return idev; } if (imr->imr_address.s_addr) { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 9d421f4cf3ef..d0ffcbe369b7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1245,10 +1245,8 @@ static int ipgre_close(struct net_device *dev) if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) { struct in_device *in_dev; in_dev = inetdev_by_index(dev_net(dev), t->mlink); - if (in_dev) { + if (in_dev) ip_mc_dec_group(in_dev, t->parms.iph.daddr); - in_dev_put(in_dev); - } } return 0; } -- cgit v1.2.3 From 9b0c290e78d667e6a483bde8c7cef7dd15f49017 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Oct 2010 22:03:38 +0000 Subject: fib: introduce fib_alias_accessed() helper Perf tools session at NFWS 2010 pointed out a false sharing on struct fib_alias that can be avoided pretty easily, if we set FA_S_ACCESSED bit only if needed (ie : not already set) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/fib_hash.c | 3 ++- net/ipv4/fib_lookup.h | 7 +++++++ net/ipv4/fib_semantics.c | 2 +- net/ipv4/fib_trie.c | 3 ++- 4 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net/ipv4/fib_semantics.c') diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4f1aafd3ba89..43e1c594ce8f 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -335,7 +335,8 @@ void fib_table_select_default(struct fib_table *tb, if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; - fa->fa_state |= FA_S_ACCESSED; + + fib_alias_accessed(fa); if (fi == NULL) { if (next_fi != res->fi) diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 5072d8effd5d..a29edf2219c8 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -17,6 +17,13 @@ struct fib_alias { #define FA_S_ACCESSED 0x01 +/* Dont write on fa_state unless needed, to keep it shared on all cpus */ +static inline void fib_alias_accessed(struct fib_alias *fa) +{ + if (!(fa->fa_state & FA_S_ACCESSED)) + fa->fa_state |= FA_S_ACCESSED; +} + /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6734c9cab248..3e0da3ef6116 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -901,7 +901,7 @@ int fib_semantic_match(struct list_head *head, const struct flowi *flp, if (fa->fa_scope < flp->fl4_scope) continue; - fa->fa_state |= FA_S_ACCESSED; + fib_alias_accessed(fa); err = fib_props[fa->fa_type].error; if (err == 0) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 31494f335686..cd5e13aee7d5 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1838,7 +1838,8 @@ void fib_table_select_default(struct fib_table *tb, if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) continue; - fa->fa_state |= FA_S_ACCESSED; + + fib_alias_accessed(fa); if (fi == NULL) { if (next_fi != res->fi) -- cgit v1.2.3