From 0a24c43f54b296629774b47a0763123f3c18db9b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:31 +0000 Subject: ip6mr: do not get a device reference in pim6_rcv() pim6_rcv() is called under rcu_read_lock(), there is no need to use dev_hold()/dev_put() pair. IPv4 side was handled in commit 55747a0a73ea ("ipmr: __pim_rcv() is called under rcu_read_lock") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index d4aad41c9849..aa66c032ba97 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -554,7 +554,6 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) reg_dev = mrt->vif_table[reg_vif_num].dev; - dev_hold(reg_dev); read_unlock(&mrt_lock); if (!reg_dev) @@ -570,7 +569,6 @@ static int pim6_rcv(struct sk_buff *skb) netif_rx(skb); - dev_put(reg_dev); return 0; drop: kfree_skb(skb); -- cgit v1.2.3 From ebc3197963fc42841b183d0280bd3f9f85f13c30 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:32 +0000 Subject: ipmr: add rcu protection over (struct vif_device)->dev We will soon use RCU instead of rwlock in ipmr & ip6mr This preliminary patch adds proper rcu verbs to read/write (struct vif_device)->dev Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 11 +++++--- net/ipv4/ipmr.c | 65 ++++++++++++++++++++++++++------------------- net/ipv4/ipmr_base.c | 49 +++++++++++++++++++++------------- net/ipv6/ip6mr.c | 63 ++++++++++++++++++++++++------------------- 4 files changed, 111 insertions(+), 77 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index e05ee9f001ff..10d1e4fb4e9f 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -26,7 +26,7 @@ * @remote: Remote address for tunnels */ struct vif_device { - struct net_device *dev; + struct net_device __rcu *dev; netdevice_tracker dev_tracker; unsigned long bytes_in, bytes_out; unsigned long pkt_in, pkt_out; @@ -52,6 +52,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, struct netlink_ext_ack *extack) { @@ -60,7 +61,7 @@ static inline int mr_call_vif_notifier(struct notifier_block *nb, .family = family, .extack = extack, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -73,6 +74,7 @@ static inline int mr_call_vif_notifiers(struct net *net, unsigned short family, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, unsigned short vif_index, u32 tb_id, unsigned int *ipmr_seq) { @@ -80,7 +82,7 @@ static inline int mr_call_vif_notifiers(struct net *net, .info = { .family = family, }, - .dev = vif->dev, + .dev = vif_dev, .vif_index = vif_index, .vif_flags = vif->flags, .tb_id = tb_id, @@ -98,7 +100,8 @@ static inline int mr_call_vif_notifiers(struct net *net, #define MAXVIFS 32 #endif -#define VIF_EXISTS(_mrt, _idx) (!!((_mrt)->vif_table[_idx].dev)) +/* Note: This helper is deprecated. */ +#define VIF_EXISTS(_mrt, _idx) (!!rcu_access_pointer((_mrt)->vif_table[_idx].dev)) /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8324e541d193..10371a9e78fc 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -79,6 +79,12 @@ struct ipmr_result { static DEFINE_RWLOCK(mrt_lock); +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference_check(vif->dev, + lockdep_is_held(&mrt_lock)); +} + /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ @@ -586,7 +592,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, read_lock(&mrt_lock); if (mrt->mroute_reg_vif_num >= 0) - reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev; + reg_dev = vif_dev_read(&mrt->vif_table[mrt->mroute_reg_vif_num]); read_unlock(&mrt_lock); if (!reg_dev) @@ -614,10 +620,11 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) static int call_ipmr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, vifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IPMR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv4.ipmr_seq); } @@ -649,18 +656,14 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); + dev = rtnl_dereference(v->dev); + if (!dev) + return -EADDRNOTAVAIL; write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); - return -EADDRNOTAVAIL; - } + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); + RCU_INIT_POINTER(v->dev, NULL); if (vifi == mrt->mroute_reg_vif_num) mrt->mroute_reg_vif_num = -1; @@ -890,14 +893,15 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev = dev; + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); if (v->flags & VIFF_REGISTER) mrt->mroute_reg_vif_num = vifi; if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); - call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, + vifi, mrt->id); return 0; } @@ -1726,7 +1730,7 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v ipmr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) vif_delete(mrt, ct, 1, NULL); } } @@ -1811,19 +1815,21 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; + struct net_device *vif_dev; struct net_device *dev; struct rtable *rt; struct flowi4 fl4; int encap = 0; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; if (vif->flags & VIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); goto out_free; } @@ -1881,8 +1887,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, if (vif->flags & VIFF_TUNNEL) { ip_encap(net, skb, vif->local, vif->remote); /* FIXME: extra output firewall step used to be here. --RR */ - vif->dev->stats.tx_packets++; - vif->dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1911,7 +1917,7 @@ static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) int ct; for (ct = mrt->maxvif-1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; @@ -1944,7 +1950,7 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } /* Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { if (rt_is_output_route(skb_rtable(skb))) { /* It is our own packet, looped back. * Very complicated situation... @@ -2744,18 +2750,21 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) { + struct net_device *vif_dev; struct nlattr *vif_nest; struct vif_device *vif; + vif = &mrt->vif_table[vifid]; + vif_dev = vif_dev_read(vif); /* if the VIF doesn't exist just continue */ - if (!VIF_EXISTS(mrt, vifid)) + if (!vif_dev) return true; - vif = &mrt->vif_table[vifid]; vif_nest = nla_nest_start_noflag(skb, IPMRA_VIF); if (!vif_nest) return false; - if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif->dev->ifindex) || + + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) || nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, @@ -2919,9 +2928,11 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? - vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", vif - mrt->vif_table, diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index aa8738a91210..59f62b938472 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -13,7 +13,7 @@ void vif_device_init(struct vif_device *v, unsigned short flags, unsigned short get_iflink_mask) { - v->dev = NULL; + RCU_INIT_POINTER(v->dev, NULL); v->bytes_in = 0; v->bytes_out = 0; v->pkt_in = 0; @@ -208,6 +208,7 @@ EXPORT_SYMBOL(mr_mfc_seq_next); int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mr_mfc *c, struct rtmsg *rtm) { + struct net_device *vif_dev; struct rta_mfc_stats mfcs; struct nlattr *mp_attr; struct rtnexthop *nhp; @@ -220,10 +221,13 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, return -ENOENT; } - if (VIF_EXISTS(mrt, c->mfc_parent) && - nla_put_u32(skb, RTA_IIF, - mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) + rcu_read_lock(); + vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); + if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + rcu_read_unlock(); return -EMSGSIZE; + } + rcu_read_unlock(); if (c->mfc_flags & MFC_OFFLOAD) rtm->rtm_flags |= RTNH_F_OFFLOAD; @@ -232,23 +236,27 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, if (!mp_attr) return -EMSGSIZE; + rcu_read_lock(); for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - struct vif_device *vif; + struct vif_device *vif = &mrt->vif_table[ct]; + + vif_dev = rcu_dereference(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); if (!nhp) { + rcu_read_unlock(); nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - vif = &mrt->vif_table[ct]; - nhp->rtnh_ifindex = vif->dev->ifindex; + nhp->rtnh_ifindex = vif_dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } + rcu_read_unlock(); nla_nest_end(skb, mp_attr); @@ -275,13 +283,14 @@ static bool mr_mfc_uses_dev(const struct mr_table *mrt, int ct; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { - if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { - const struct vif_device *vif; - - vif = &mrt->vif_table[ct]; - if (vif->dev == dev) - return true; - } + const struct net_device *vif_dev; + const struct vif_device *vif; + + vif = &mrt->vif_table[ct]; + vif_dev = rcu_access_pointer(vif->dev); + if (vif_dev && c->mfc_un.res.ttls[ct] < 255 && + vif_dev == dev) + return true; } return false; } @@ -402,18 +411,22 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, for (mrt = mr_iter(net, NULL); mrt; mrt = mr_iter(net, mrt)) { struct vif_device *v = &mrt->vif_table[0]; + struct net_device *vif_dev; struct mr_mfc *mfc; int vifi; /* Notifiy on table VIF entries */ read_lock(mrt_lock); for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - if (!v->dev) + vif_dev = rcu_dereference_check(v->dev, + lockdep_is_held(mrt_lock)); + if (!vif_dev) continue; err = mr_call_vif_notifier(nb, family, - FIB_EVENT_VIF_ADD, - v, vifi, mrt->id, extack); + FIB_EVENT_VIF_ADD, v, + vif_dev, vifi, + mrt->id, extack); if (err) break; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index aa66c032ba97..44cb3d88bbd6 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -64,6 +64,12 @@ struct ip6mr_result { static DEFINE_RWLOCK(mrt_lock); +static struct net_device *vif_dev_read(const struct vif_device *vif) +{ + return rcu_dereference_check(vif->dev, + lockdep_is_held(&mrt_lock)); +} + /* Multicast router control variables */ /* Special spinlock for queue of unresolved entries */ @@ -430,7 +436,11 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) "Interface BytesIn PktsIn BytesOut PktsOut Flags\n"); } else { const struct vif_device *vif = v; - const char *name = vif->dev ? vif->dev->name : "none"; + const struct net_device *vif_dev; + const char *name; + + vif_dev = vif_dev_read(vif); + name = vif_dev ? vif_dev->name : "none"; seq_printf(seq, "%2td %-10s %8ld %7ld %8ld %7ld %05X\n", @@ -553,7 +563,7 @@ static int pim6_rcv(struct sk_buff *skb) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = mrt->vif_table[reg_vif_num].dev; + reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); read_unlock(&mrt_lock); if (!reg_dev) @@ -668,10 +678,11 @@ failure: static int call_ip6mr_vif_entry_notifiers(struct net *net, enum fib_event_type event_type, struct vif_device *vif, + struct net_device *vif_dev, mifi_t vif_index, u32 tb_id) { return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type, - vif, vif_index, tb_id, + vif, vif_dev, vif_index, tb_id, &net->ipv6.ipmr_seq); } @@ -696,19 +707,15 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; - if (VIF_EXISTS(mrt, vifi)) - call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), - FIB_EVENT_VIF_DEL, v, vifi, - mrt->id); + dev = rtnl_dereference(v->dev); + if (!dev) + return -EADDRNOTAVAIL; + call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), + FIB_EVENT_VIF_DEL, v, dev, + vifi, mrt->id); write_lock_bh(&mrt_lock); - dev = v->dev; - v->dev = NULL; - - if (!dev) { - write_unlock_bh(&mrt_lock); - return -EADDRNOTAVAIL; - } + RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 if (vifi == mrt->mroute_reg_vif_num) @@ -911,7 +918,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, /* And finish update writing critical data */ write_lock_bh(&mrt_lock); - v->dev = dev; + rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) @@ -921,7 +928,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, mrt->maxvif = vifi + 1; write_unlock_bh(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, - v, vifi, mrt->id); + v, dev, vifi, mrt->id); return 0; } @@ -1241,7 +1248,7 @@ static int ip6mr_device_event(struct notifier_block *this, ip6mr_for_each_table(mrt, net) { v = &mrt->vif_table[0]; for (ct = 0; ct < mrt->maxvif; ct++, v++) { - if (v->dev == dev) + if (rcu_access_pointer(v->dev) == dev) mif6_delete(mrt, ct, 1, NULL); } } @@ -2019,21 +2026,22 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct static int ip6mr_forward2(struct net *net, struct mr_table *mrt, struct sk_buff *skb, int vifi) { - struct ipv6hdr *ipv6h; struct vif_device *vif = &mrt->vif_table[vifi]; - struct net_device *dev; + struct net_device *vif_dev; + struct ipv6hdr *ipv6h; struct dst_entry *dst; struct flowi6 fl6; - if (!vif->dev) + vif_dev = vif_dev_read(vif); + if (!vif_dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { vif->pkt_out++; vif->bytes_out += skb->len; - vif->dev->stats.tx_bytes += skb->len; - vif->dev->stats.tx_packets++; + vif_dev->stats.tx_bytes += skb->len; + vif_dev->stats.tx_packets++; ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); goto out_free; } @@ -2066,14 +2074,13 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - dev = vif->dev; - skb->dev = dev; + skb->dev = vif_dev; vif->pkt_out++; vif->bytes_out += skb->len; /* We are about to write */ /* XXX: extension headers? */ - if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev))) + if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(vif_dev))) goto out_free; ipv6h = ipv6_hdr(skb); @@ -2082,7 +2089,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, - net, NULL, skb, skb->dev, dev, + net, NULL, skb, skb->dev, vif_dev, ip6mr_forward2_finish); out_free: @@ -2095,7 +2102,7 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) int ct; for (ct = mrt->maxvif - 1; ct >= 0; ct--) { - if (mrt->vif_table[ct].dev == dev) + if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } return ct; @@ -2133,7 +2140,7 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (mrt->vif_table[vif].dev != dev) { + if (rcu_access_pointer(mrt->vif_table[vif].dev) != dev) { c->_c.mfc_un.res.wrong_if++; if (true_vifi >= 0 && mrt->mroute_do_assert && -- cgit v1.2.3 From 0b490b51d226b562a0aa6d287cfa25417f71fb79 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:33 +0000 Subject: ipmr: change igmpmsg_netlink_event() prototype igmpmsg_netlink_event() first argument can be marked const. igmpmsg_netlink_event() reads mrt->net and mrt->id, both being set once in mr_table_alloc(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 10371a9e78fc..6710324497ca 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -110,7 +110,7 @@ static int ipmr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); -static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); +static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); @@ -2410,7 +2410,7 @@ static size_t igmpmsg_netlink_msgsize(size_t payloadlen) return len; } -static void igmpmsg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; -- cgit v1.2.3 From 646679881a02a15a0915f4cd18dff6cecb8960fc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:34 +0000 Subject: ipmr: ipmr_cache_report() changes ipmr_cache_report() first argument can be marked const, and we change the caller convention about which lock needs to be held. Instead of read_lock(&mrt_lock), we can use rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 45 +++++++++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6710324497ca..8fe7a688cf41 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -106,7 +106,7 @@ static void ipmr_free_table(struct mr_table *mrt); static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *cache, int local); -static int ipmr_cache_report(struct mr_table *mrt, +static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert); static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); @@ -507,11 +507,15 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) return err; } - read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT); - read_unlock(&mrt_lock); + rcu_read_lock(); + + /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ + ipmr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), + IGMPMSG_WHOLEPKT); + + rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; } @@ -665,9 +669,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, vifi, mrt->id); RCU_INIT_POINTER(v->dev, NULL); - if (vifi == mrt->mroute_reg_vif_num) - mrt->mroute_reg_vif_num = -1; - + if (vifi == mrt->mroute_reg_vif_num) { + /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, -1); + } if (vifi + 1 == mrt->maxvif) { int tmp; @@ -895,8 +900,10 @@ static int vif_add(struct net *net, struct mr_table *mrt, write_lock_bh(&mrt_lock); rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); - if (v->flags & VIFF_REGISTER) - mrt->mroute_reg_vif_num = vifi; + if (v->flags & VIFF_REGISTER) { + /* Pairs with READ_ONCE() in ipmr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); + } if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); @@ -1005,9 +1012,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, /* Bounce a cache query up to mrouted and netlink. * - * Called under mrt_lock. + * Called under rcu_read_lock(). */ -static int ipmr_cache_report(struct mr_table *mrt, +static int ipmr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, vifi_t vifi, int assert) { const int ihl = ip_hdrlen(pkt); @@ -1042,8 +1049,11 @@ static int ipmr_cache_report(struct mr_table *mrt, msg->im_vif = vifi; msg->im_vif_hi = vifi >> 8; } else { - msg->im_vif = mrt->mroute_reg_vif_num; - msg->im_vif_hi = mrt->mroute_reg_vif_num >> 8; + /* Pairs with WRITE_ONCE() in vif_add() and vif_delete() */ + int vif_num = READ_ONCE(mrt->mroute_reg_vif_num); + + msg->im_vif = vif_num; + msg->im_vif_hi = vif_num >> 8; } ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + @@ -1068,10 +1078,8 @@ static int ipmr_cache_report(struct mr_table *mrt, skb->transport_header = skb->network_header; } - rcu_read_lock(); mroute_sk = rcu_dereference(mrt->mroute_sk); if (!mroute_sk) { - rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } @@ -1080,7 +1088,7 @@ static int ipmr_cache_report(struct mr_table *mrt, /* Deliver to mrouted */ ret = sock_queue_rcv_skb(mroute_sk, skb); - rcu_read_unlock(); + if (ret < 0) { net_warn_ratelimited("mroute: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -1090,6 +1098,7 @@ static int ipmr_cache_report(struct mr_table *mrt, } /* Queue a packet for resolution. It gets locked cache entry! */ +/* Called under rcu_read_lock() */ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb, struct net_device *dev) { @@ -1830,7 +1839,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->bytes_out += skb->len; vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; + rcu_read_lock(); ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); + rcu_read_unlock(); goto out_free; } @@ -1980,10 +1991,12 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; + rcu_read_lock(); ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); if (mrt->mroute_do_wrvifwhole) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); + rcu_read_unlock(); } goto dont_forward; } -- cgit v1.2.3 From 121fefc669bf6f603bbda380c91d788405bc4f1f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:35 +0000 Subject: ipmr: do not acquire mrt_lock in __pim_rcv() rcu_read_lock() protection is more than enough. vif_dev_read() supports either mrt_lock or rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8fe7a688cf41..8a94f9a459cd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -582,6 +582,7 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, { struct net_device *reg_dev = NULL; struct iphdr *encap; + int vif_num; encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* Check that: @@ -594,11 +595,10 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb, ntohs(encap->tot_len) + pimlen > skb->len) return 1; - read_lock(&mrt_lock); - if (mrt->mroute_reg_vif_num >= 0) - reg_dev = vif_dev_read(&mrt->vif_table[mrt->mroute_reg_vif_num]); - read_unlock(&mrt_lock); - + /* Pairs with WRITE_ONCE() in vif_add()/vid_delete() */ + vif_num = READ_ONCE(mrt->mroute_reg_vif_num); + if (vif_num >= 0) + reg_dev = vif_dev_read(&mrt->vif_table[vif_num]); if (!reg_dev) return 1; -- cgit v1.2.3 From 559260fd9d9a43afa4f8dd518969a1cc7dd40418 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:36 +0000 Subject: ipmr: do not acquire mrt_lock in ioctl(SIOCGETVIFCNT) rcu_read_lock() protection is good enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8a94f9a459cd..bc8b7504fde6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1611,20 +1611,20 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.vifi >= mrt->maxvif) return -EINVAL; vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1686,20 +1686,20 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.vifi >= mrt->maxvif) return -EINVAL; vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1835,8 +1835,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; if (vif->flags & VIFF_REGISTER) { - vif->pkt_out++; - vif->bytes_out += skb->len; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; rcu_read_lock(); @@ -1885,8 +1885,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } - vif->pkt_out++; - vif->bytes_out += skb->len; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); @@ -2002,8 +2002,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, } forward: - mrt->vif_table[vif].pkt_in++; - mrt->vif_table[vif].bytes_in += skb->len; + WRITE_ONCE(mrt->vif_table[vif].pkt_in, + mrt->vif_table[vif].pkt_in + 1); + WRITE_ONCE(mrt->vif_table[vif].bytes_in, + mrt->vif_table[vif].bytes_in + skb->len); /* Forward the frame */ if (c->mfc_origin == htonl(INADDR_ANY) && -- cgit v1.2.3 From 9094db4b8004fca4899852684df2a32dab15c851 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:37 +0000 Subject: ipmr: do not acquire mrt_lock before calling ipmr_cache_unresolved() rcu_read_lock() protection is good enough. ipmr_cache_unresolved() uses a dedicated spinlock (mfc_unres_lock) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index bc8b7504fde6..6ea54bc3d9b6 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -680,7 +680,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (VIF_EXISTS(mrt, tmp)) break; } - mrt->maxvif = tmp+1; + WRITE_ONCE(mrt->maxvif, tmp + 1); } write_unlock_bh(&mrt_lock); @@ -905,7 +905,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); } if (vifi+1 > mrt->maxvif) - mrt->maxvif = vifi+1; + WRITE_ONCE(mrt->maxvif, vifi + 1); write_unlock_bh(&mrt_lock); call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); @@ -1923,11 +1923,12 @@ out_free: kfree_skb(skb); } -static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev) +/* Called with mrt_lock or rcu_read_lock() */ +static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) { int ct; - - for (ct = mrt->maxvif-1; ct >= 0; ct--) { + /* Pairs with WRITE_ONCE() in vif_delete()/vif_add() */ + for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } @@ -2161,15 +2162,9 @@ int ip_mr_input(struct sk_buff *skb) skb = skb2; } - read_lock(&mrt_lock); vif = ipmr_find_vif(mrt, dev); - if (vif >= 0) { - int err2 = ipmr_cache_unresolved(mrt, vif, skb, dev); - read_unlock(&mrt_lock); - - return err2; - } - read_unlock(&mrt_lock); + if (vif >= 0) + return ipmr_cache_unresolved(mrt, vif, skb, dev); kfree_skb(skb); return -ENODEV; } @@ -2273,18 +2268,15 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, int vif = -1; dev = skb->dev; - read_lock(&mrt_lock); if (dev) vif = ipmr_find_vif(mrt, dev); if (vif < 0) { - read_unlock(&mrt_lock); rcu_read_unlock(); return -ENODEV; } skb2 = skb_realloc_headroom(skb, sizeof(struct iphdr)); if (!skb2) { - read_unlock(&mrt_lock); rcu_read_unlock(); return -ENOMEM; } @@ -2298,7 +2290,6 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, iph->daddr = daddr; iph->version = 0; err = ipmr_cache_unresolved(mrt, vif, skb2, dev); - read_unlock(&mrt_lock); rcu_read_unlock(); return err; } -- cgit v1.2.3 From 4eadb88244d17056179a47f613e30c49191072c5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:38 +0000 Subject: ipmr: do not acquire mrt_lock while calling ip_mr_forward() ip_mr_forward() uses standard RCU protection already. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 6ea54bc3d9b6..b0f2e6d79d62 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1817,7 +1817,7 @@ static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, } #endif -/* Processing handlers for ipmr_forward */ +/* Processing handlers for ipmr_forward, under rcu_read_lock() */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, int in_vifi, struct sk_buff *skb, int vifi) @@ -1839,9 +1839,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; - rcu_read_lock(); ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT); - rcu_read_unlock(); goto out_free; } @@ -1936,6 +1934,7 @@ static int ipmr_find_vif(const struct mr_table *mrt, struct net_device *dev) } /* "local" means that we should preserve one skb (for local delivery) */ +/* Called uner rcu_read_lock() */ static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc_cache *c, int local) @@ -1992,12 +1991,10 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; - rcu_read_lock(); ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); if (mrt->mroute_do_wrvifwhole) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); - rcu_read_unlock(); } goto dont_forward; } @@ -2169,9 +2166,7 @@ int ip_mr_input(struct sk_buff *skb) return -ENODEV; } - read_lock(&mrt_lock); ip_mr_forward(net, mrt, dev, skb, cache, local); - read_unlock(&mrt_lock); if (local) return ip_local_deliver(skb); -- cgit v1.2.3 From e4cd9868e8ec3691e6d94725c8b10edd1ec6eca2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:39 +0000 Subject: ipmr: do not acquire mrt_lock in ipmr_get_route() mr_fill_mroute() uses standard rcu_read_unlock(), no need to grab mrt_lock anymore. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index b0f2e6d79d62..69ccd3d7c655 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2289,9 +2289,7 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, return err; } - read_lock(&mrt_lock); err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); - read_unlock(&mrt_lock); rcu_read_unlock(); return err; } -- cgit v1.2.3 From 3493a5b730e5c0dfecb445e56a4ad36a36819ba9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:40 +0000 Subject: ip6mr: ip6mr_cache_report() changes ip6mr_cache_report() first argument can be marked const, and we change the caller convention about which lock needs to be held. Instead of read_lock(&mrt_lock), we can use rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 44cb3d88bbd6..a6d97952bf53 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -91,11 +91,11 @@ static void ip6mr_free_table(struct mr_table *mrt); static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *cache); -static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert); static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc, int cmd); -static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt); +static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb); static void mroute_clean_tables(struct mr_table *mrt, int flags); @@ -608,11 +608,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto tx_err; - read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT); - read_unlock(&mrt_lock); + rcu_read_lock(); + ip6mr_cache_report(mrt, skb, READ_ONCE(mrt->mroute_reg_vif_num), + MRT6MSG_WHOLEPKT); + rcu_read_unlock(); kfree_skb(skb); return NETDEV_TX_OK; @@ -718,8 +719,10 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 - if (vifi == mrt->mroute_reg_vif_num) - mrt->mroute_reg_vif_num = -1; + if (vifi == mrt->mroute_reg_vif_num) { + /* Pairs with READ_ONCE() in ip6mr_cache_report() and reg_vif_xmit() */ + WRITE_ONCE(mrt->mroute_reg_vif_num, -1); + } #endif if (vifi + 1 == mrt->maxvif) { @@ -922,7 +925,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 if (v->flags & MIFF_REGISTER) - mrt->mroute_reg_vif_num = vifi; + WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); #endif if (vifi + 1 > mrt->maxvif) mrt->maxvif = vifi + 1; @@ -1033,10 +1036,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, /* * Bounce a cache query up to pim6sd and netlink. * - * Called under mrt_lock. + * Called under rcu_read_lock() */ -static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, +static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt, mifi_t mifi, int assert) { struct sock *mroute6_sk; @@ -1077,7 +1080,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, if (assert == MRT6MSG_WRMIFWHOLE) msg->im6_mif = mifi; else - msg->im6_mif = mrt->mroute_reg_vif_num; + msg->im6_mif = READ_ONCE(mrt->mroute_reg_vif_num); msg->im6_pad = 0; msg->im6_src = ipv6_hdr(pkt)->saddr; msg->im6_dst = ipv6_hdr(pkt)->daddr; @@ -1112,10 +1115,8 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - rcu_read_lock(); mroute6_sk = rcu_dereference(mrt->mroute_sk); if (!mroute6_sk) { - rcu_read_unlock(); kfree_skb(skb); return -EINVAL; } @@ -1124,7 +1125,7 @@ static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt, /* Deliver to user space multicast routing algorithms */ ret = sock_queue_rcv_skb(mroute6_sk, skb); - rcu_read_unlock(); + if (ret < 0) { net_warn_ratelimited("mroute6: pending queue full, dropping entries\n"); kfree_skb(skb); @@ -2042,7 +2043,9 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, vif->bytes_out += skb->len; vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; + rcu_read_lock(); ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); + rcu_read_unlock(); goto out_free; } #endif @@ -2155,10 +2158,12 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; + rcu_read_lock(); ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); if (mrt->mroute_do_wrvifwhole) ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRMIFWHOLE); + rcu_read_unlock(); } goto dont_forward; } @@ -2465,7 +2470,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen) return len; } -static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt) +static void mrt6msg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt) { struct net *net = read_pnet(&mrt->net); struct nlmsghdr *nlh; -- cgit v1.2.3 From 6d08658736fc8b4baae778a7f51d2c7baa47eeff Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:41 +0000 Subject: ip6mr: do not acquire mrt_lock in pim6_rcv() rcu_read_lock() protection is more than enough. vif_dev_read() supports either mrt_lock or rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index a6d97952bf53..fa6720377e82 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -559,12 +559,11 @@ static int pim6_rcv(struct sk_buff *skb) if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0) goto drop; - reg_vif_num = mrt->mroute_reg_vif_num; - read_lock(&mrt_lock); + /* Pairs with WRITE_ONCE() in mif6_add()/mif6_delete() */ + reg_vif_num = READ_ONCE(mrt->mroute_reg_vif_num); if (reg_vif_num >= 0) reg_dev = vif_dev_read(&mrt->vif_table[reg_vif_num]); - read_unlock(&mrt_lock); if (!reg_dev) goto drop; -- cgit v1.2.3 From 638cf4a24a09d14c51415dd0964f978f97d54642 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:42 +0000 Subject: ip6mr: do not acquire mrt_lock in ioctl(SIOCGETMIFCNT_IN6) rcu_read_lock() protection is good enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index fa6720377e82..b4ad606e24bd 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1896,20 +1896,20 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -1971,20 +1971,20 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) if (vr.mifi >= mrt->maxvif) return -EINVAL; vr.mifi = array_index_nospec(vr.mifi, mrt->maxvif); - read_lock(&mrt_lock); + rcu_read_lock(); vif = &mrt->vif_table[vr.mifi]; if (VIF_EXISTS(mrt, vr.mifi)) { - vr.icount = vif->pkt_in; - vr.ocount = vif->pkt_out; - vr.ibytes = vif->bytes_in; - vr.obytes = vif->bytes_out; - read_unlock(&mrt_lock); + vr.icount = READ_ONCE(vif->pkt_in); + vr.ocount = READ_ONCE(vif->pkt_out); + vr.ibytes = READ_ONCE(vif->bytes_in); + vr.obytes = READ_ONCE(vif->bytes_out); + rcu_read_unlock(); if (copy_to_user(arg, &vr, sizeof(vr))) return -EFAULT; return 0; } - read_unlock(&mrt_lock); + rcu_read_unlock(); return -EADDRNOTAVAIL; case SIOCGETSGCNT_IN6: if (copy_from_user(&sr, arg, sizeof(sr))) @@ -2038,8 +2038,8 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, #ifdef CONFIG_IPV6_PIMSM_V2 if (vif->flags & MIFF_REGISTER) { - vif->pkt_out++; - vif->bytes_out += skb->len; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; rcu_read_lock(); @@ -2077,8 +2077,8 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, * result in receiving multiple packets. */ skb->dev = vif_dev; - vif->pkt_out++; - vif->bytes_out += skb->len; + WRITE_ONCE(vif->pkt_out, vif->pkt_out + 1); + WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); /* We are about to write */ /* XXX: extension headers? */ @@ -2168,8 +2168,10 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, } forward: - mrt->vif_table[vif].pkt_in++; - mrt->vif_table[vif].bytes_in += skb->len; + WRITE_ONCE(mrt->vif_table[vif].pkt_in, + mrt->vif_table[vif].pkt_in + 1); + WRITE_ONCE(mrt->vif_table[vif].bytes_in, + mrt->vif_table[vif].bytes_in + skb->len); /* * Forward the frame -- cgit v1.2.3 From db9eb7c8ae34c4acdb8a94c3566d1b4f4b5e7b1c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:43 +0000 Subject: ip6mr: do not acquire mrt_lock before calling ip6mr_cache_unresolved rcu_read_lock() protection is good enough. ip6mr_cache_unresolved() uses a dedicated spinlock (mfc_unres_lock) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index b4ad606e24bd..089c26af3120 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -730,7 +730,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, if (VIF_EXISTS(mrt, tmp)) break; } - mrt->maxvif = tmp + 1; + WRITE_ONCE(mrt->maxvif, tmp + 1); } write_unlock_bh(&mrt_lock); @@ -927,7 +927,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, WRITE_ONCE(mrt->mroute_reg_vif_num, vifi); #endif if (vifi + 1 > mrt->maxvif) - mrt->maxvif = vifi + 1; + WRITE_ONCE(mrt->maxvif, vifi + 1); write_unlock_bh(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); @@ -2099,11 +2099,13 @@ out_free: return 0; } +/* Called with mrt_lock or rcu_read_lock() */ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; - for (ct = mrt->maxvif - 1; ct >= 0; ct--) { + /* Pairs with WRITE_ONCE() in mif6_delete()/mif6_add() */ + for (ct = READ_ONCE(mrt->maxvif) - 1; ct >= 0; ct--) { if (rcu_access_pointer(mrt->vif_table[ct].dev) == dev) break; } @@ -2249,7 +2251,6 @@ int ip6_mr_input(struct sk_buff *skb) return err; } - read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); if (!cache) { @@ -2270,15 +2271,14 @@ int ip6_mr_input(struct sk_buff *skb) vif = ip6mr_find_vif(mrt, dev); if (vif >= 0) { int err = ip6mr_cache_unresolved(mrt, vif, skb, dev); - read_unlock(&mrt_lock); return err; } - read_unlock(&mrt_lock); kfree_skb(skb); return -ENODEV; } + read_lock(&mrt_lock); ip6_mr_forward(net, mrt, dev, skb, cache); read_unlock(&mrt_lock); -- cgit v1.2.3 From 9b1c21d898fdcab75c47c9827b6ded526eb33a18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:44 +0000 Subject: ip6mr: do not acquire mrt_lock while calling ip6_mr_forward() ip6_mr_forward() uses standard RCU protection already. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 089c26af3120..8a751a360203 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2042,9 +2042,7 @@ static int ip6mr_forward2(struct net *net, struct mr_table *mrt, WRITE_ONCE(vif->bytes_out, vif->bytes_out + skb->len); vif_dev->stats.tx_bytes += skb->len; vif_dev->stats.tx_packets++; - rcu_read_lock(); ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT); - rcu_read_unlock(); goto out_free; } #endif @@ -2112,6 +2110,7 @@ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) return ct; } +/* Called under rcu_read_lock() */ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, struct mfc6_cache *c) @@ -2131,14 +2130,12 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, /* For an (*,G) entry, we only check that the incoming * interface is part of the static tree. */ - rcu_read_lock(); cache_proxy = mr_mfc_find_any_parent(mrt, vif); if (cache_proxy && cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) { rcu_read_unlock(); goto forward; } - rcu_read_unlock(); } /* @@ -2159,12 +2156,10 @@ static void ip6_mr_forward(struct net *net, struct mr_table *mrt, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; - rcu_read_lock(); ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF); if (mrt->mroute_do_wrvifwhole) ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRMIFWHOLE); - rcu_read_unlock(); } goto dont_forward; } @@ -2278,11 +2273,8 @@ int ip6_mr_input(struct sk_buff *skb) return -ENODEV; } - read_lock(&mrt_lock); ip6_mr_forward(net, mrt, dev, skb, cache); - read_unlock(&mrt_lock); - return 0; } -- cgit v1.2.3 From 6fa40a29021976c91b6ad720285bdfd4410aaa34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:45 +0000 Subject: ip6mr: switch ip6mr_get_route() to rcu_read_lock() Like ipmr_get_route(), we can use standard RCU here. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8a751a360203..08ac177fe30c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2290,7 +2290,7 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, if (!mrt) return -ENOENT; - read_lock(&mrt_lock); + rcu_read_lock(); cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr); if (!cache && skb->dev) { int vif = ip6mr_find_vif(mrt, skb->dev); @@ -2308,14 +2308,14 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, dev = skb->dev; if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { - read_unlock(&mrt_lock); + rcu_read_unlock(); return -ENODEV; } /* really correct? */ skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC); if (!skb2) { - read_unlock(&mrt_lock); + rcu_read_unlock(); return -ENOMEM; } @@ -2338,13 +2338,13 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm, iph->daddr = rt->rt6i_dst.addr; err = ip6mr_cache_unresolved(mrt, vif, skb2, dev); - read_unlock(&mrt_lock); + rcu_read_unlock(); return err; } err = mr_fill_mroute(mrt, skb, &cache->_c, rtm); - read_unlock(&mrt_lock); + rcu_read_unlock(); return err; } -- cgit v1.2.3 From 194366b28b8306b7a24596c57c09635ab2891252 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:46 +0000 Subject: ipmr: adopt rcu_read_lock() in mr_dump() We no longer need to acquire mrt_lock() in mr_dump, using rcu_read_lock() is enough. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/mroute_base.h | 4 ++-- net/ipv4/ipmr.c | 2 +- net/ipv4/ipmr_base.c | 8 +++----- net/ipv6/ip6mr.c | 2 +- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index 10d1e4fb4e9f..9dd4bf157255 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -308,7 +308,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack); + struct netlink_ext_ack *extack); #else static inline void vif_device_init(struct vif_device *v, struct net_device *dev, @@ -363,7 +363,7 @@ static inline int mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack) { return -EINVAL; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 69ccd3d7c655..38963b8de7af 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -3027,7 +3027,7 @@ static int ipmr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IPMR, ipmr_rules_dump, - ipmr_mr_table_iter, &mrt_lock, extack); + ipmr_mr_table_iter, extack); } static const struct fib_notifier_ops ipmr_notifier_ops_template = { diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 59f62b938472..271dc03fc6db 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -399,7 +399,6 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, struct netlink_ext_ack *extack), struct mr_table *(*mr_iter)(struct net *net, struct mr_table *mrt), - rwlock_t *mrt_lock, struct netlink_ext_ack *extack) { struct mr_table *mrt; @@ -416,10 +415,9 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int vifi; /* Notifiy on table VIF entries */ - read_lock(mrt_lock); + rcu_read_lock(); for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { - vif_dev = rcu_dereference_check(v->dev, - lockdep_is_held(mrt_lock)); + vif_dev = rcu_dereference(v->dev); if (!vif_dev) continue; @@ -430,7 +428,7 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, if (err) break; } - read_unlock(mrt_lock); + rcu_read_unlock(); if (err) return err; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 08ac177fe30c..f0a9bceb8e3c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1267,7 +1267,7 @@ static int ip6mr_dump(struct net *net, struct notifier_block *nb, struct netlink_ext_ack *extack) { return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump, - ip6mr_mr_table_iter, &mrt_lock, extack); + ip6mr_mr_table_iter, extack); } static struct notifier_block ip6_mr_notifier = { -- cgit v1.2.3 From b96ef16d2f837870daaea51c38cd50458b95ad5c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:47 +0000 Subject: ipmr: convert /proc handlers to rcu_read_lock() We can use standard rcu_read_lock(), to get rid of last read_lock(&mrt_lock) call points. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 8 ++++---- net/ipv6/ip6mr.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 38963b8de7af..2e39f73fe81a 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2895,7 +2895,7 @@ out: */ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(mrt_lock) + __acquires(RCU) { struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -2907,14 +2907,14 @@ static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; - read_lock(&mrt_lock); + rcu_read_lock(); return mr_vif_seq_start(seq, pos); } static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) - __releases(mrt_lock) + __releases(RCU) { - read_unlock(&mrt_lock); + rcu_read_unlock(); } static int ipmr_vif_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f0a9bceb8e3c..7381cfdac3e3 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -404,7 +404,7 @@ static void ip6mr_free_table(struct mr_table *mrt) */ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(mrt_lock) + __acquires(RCU) { struct mr_vif_iter *iter = seq->private; struct net *net = seq_file_net(seq); @@ -416,14 +416,14 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) iter->mrt = mrt; - read_lock(&mrt_lock); + rcu_read_lock(); return mr_vif_seq_start(seq, pos); } static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v) - __releases(mrt_lock) + __releases(RCU) { - read_unlock(&mrt_lock); + rcu_read_unlock(); } static int ip6mr_vif_seq_show(struct seq_file *seq, void *v) -- cgit v1.2.3 From 3f55211ecf6a8fb271565adcb704cd57a8e4547c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:48 +0000 Subject: ipmr: convert mrt_lock to a spinlock mrt_lock is only held in write mode, from process context only. We can switch to a mere spinlock, and avoid blocking BH. Also, vif_dev_read() is always called under standard rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 2e39f73fe81a..f095b6c8100b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -77,12 +77,11 @@ struct ipmr_result { * Note that the changes are semaphored via rtnl_lock. */ -static DEFINE_RWLOCK(mrt_lock); +static DEFINE_SPINLOCK(mrt_lock); static struct net_device *vif_dev_read(const struct vif_device *vif) { - return rcu_dereference_check(vif->dev, - lockdep_is_held(&mrt_lock)); + return rcu_dereference(vif->dev); } /* Multicast router control variables */ @@ -664,7 +663,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, if (!dev) return -EADDRNOTAVAIL; - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, dev, vifi, mrt->id); RCU_INIT_POINTER(v->dev, NULL); @@ -683,7 +682,7 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, WRITE_ONCE(mrt->maxvif, tmp + 1); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); @@ -785,7 +784,7 @@ out: spin_unlock(&mfc_unres_lock); } -/* Fill oifs list. It is called under write locked mrt_lock. */ +/* Fill oifs list. It is called under locked mrt_lock. */ static void ipmr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, unsigned char *ttls) { @@ -897,7 +896,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, v->remote = vifc->vifc_rmt_addr.s_addr; /* And finish update writing critical data */ - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); if (v->flags & VIFF_REGISTER) { @@ -906,7 +905,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, } if (vifi+1 > mrt->maxvif) WRITE_ONCE(mrt->maxvif, vifi + 1); - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); return 0; @@ -1211,12 +1210,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, mfc->mfcc_mcastgrp.s_addr, parent); rcu_read_unlock(); if (c) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mfcc_parent; ipmr_update_thresholds(mrt, &c->_c, mfc->mfcc_ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); -- cgit v1.2.3 From a96f7a6a60b310fdb7913d5ae70961eeeca9527d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 23 Jun 2022 04:34:49 +0000 Subject: ip6mr: convert mrt_lock to a spinlock mrt_lock is only held in write mode, from process context only. We can switch to a mere spinlock, and avoid blocking BH. Also, vif_dev_read() is always called under standard rcu_read_lock(). Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6mr.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7381cfdac3e3..ec6e1509fc7c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -62,12 +62,11 @@ struct ip6mr_result { Note that the changes are semaphored via rtnl_lock. */ -static DEFINE_RWLOCK(mrt_lock); +static DEFINE_SPINLOCK(mrt_lock); static struct net_device *vif_dev_read(const struct vif_device *vif) { - return rcu_dereference_check(vif->dev, - lockdep_is_held(&mrt_lock)); + return rcu_dereference(vif->dev); } /* Multicast router control variables */ @@ -714,7 +713,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net), FIB_EVENT_VIF_DEL, v, dev, vifi, mrt->id); - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); RCU_INIT_POINTER(v->dev, NULL); #ifdef CONFIG_IPV6_PIMSM_V2 @@ -733,7 +732,7 @@ static int mif6_delete(struct mr_table *mrt, int vifi, int notify, WRITE_ONCE(mrt->maxvif, tmp + 1); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); dev_set_allmulti(dev, -1); @@ -833,7 +832,7 @@ static void ipmr_expire_process(struct timer_list *t) spin_unlock(&mfc_unres_lock); } -/* Fill oifs list. It is called under write locked mrt_lock. */ +/* Fill oifs list. It is called under locked mrt_lock. */ static void ip6mr_update_thresholds(struct mr_table *mrt, struct mr_mfc *cache, @@ -919,7 +918,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, MIFF_REGISTER); /* And finish update writing critical data */ - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); rcu_assign_pointer(v->dev, dev); netdev_tracker_alloc(dev, &v->dev_tracker, GFP_ATOMIC); #ifdef CONFIG_IPV6_PIMSM_V2 @@ -928,7 +927,7 @@ static int mif6_add(struct net *net, struct mr_table *mrt, #endif if (vifi + 1 > mrt->maxvif) WRITE_ONCE(mrt->maxvif, vifi + 1); - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, dev, vifi, mrt->id); return 0; @@ -1442,12 +1441,12 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt, &mfc->mf6cc_mcastgrp.sin6_addr, parent); rcu_read_unlock(); if (c) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); c->_c.mfc_parent = mfc->mf6cc_parent; ip6mr_update_thresholds(mrt, &c->_c, ttls); if (!mrtsock) c->_c.mfc_flags |= MFC_STATIC; - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, mrt->id); mr6_netlink_event(mrt, c, RTM_NEWROUTE); @@ -1565,7 +1564,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) struct net *net = sock_net(sk); rtnl_lock(); - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); if (rtnl_dereference(mrt->mroute_sk)) { err = -EADDRINUSE; } else { @@ -1573,7 +1572,7 @@ static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk) sock_set_flag(sk, SOCK_RCU_FREE); atomic_inc(&net->ipv6.devconf_all->mc_forwarding); } - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); if (!err) inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, @@ -1603,14 +1602,14 @@ int ip6mr_sk_done(struct sock *sk) rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { - write_lock_bh(&mrt_lock); + spin_lock(&mrt_lock); RCU_INIT_POINTER(mrt->mroute_sk, NULL); /* Note that mroute_sk had SOCK_RCU_FREE set, * so the RCU grace period before sk freeing * is guaranteed by sk_destruct() */ atomic_dec(&devconf->mc_forwarding); - write_unlock_bh(&mrt_lock); + spin_unlock(&mrt_lock); inet6_netconf_notify_devconf(net, RTM_NEWNETCONF, NETCONFA_MC_FORWARDING, NETCONFA_IFINDEX_ALL, @@ -2097,7 +2096,7 @@ out_free: return 0; } -/* Called with mrt_lock or rcu_read_lock() */ +/* Called with rcu_read_lock() */ static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev) { int ct; -- cgit v1.2.3