diff options
Diffstat (limited to 'net/tipc')
-rw-r--r-- | net/tipc/Makefile | 2 | ||||
-rw-r--r-- | net/tipc/bcast.c | 30 | ||||
-rw-r--r-- | net/tipc/bearer.c | 87 | ||||
-rw-r--r-- | net/tipc/bearer.h | 4 | ||||
-rw-r--r-- | net/tipc/core.h | 6 | ||||
-rw-r--r-- | net/tipc/discover.c | 6 | ||||
-rw-r--r-- | net/tipc/group.c | 920 | ||||
-rw-r--r-- | net/tipc/group.h | 75 | ||||
-rw-r--r-- | net/tipc/link.c | 36 | ||||
-rw-r--r-- | net/tipc/monitor.c | 23 | ||||
-rw-r--r-- | net/tipc/msg.c | 82 | ||||
-rw-r--r-- | net/tipc/msg.h | 138 | ||||
-rw-r--r-- | net/tipc/name_table.c | 211 | ||||
-rw-r--r-- | net/tipc/name_table.h | 37 | ||||
-rw-r--r-- | net/tipc/net.c | 15 | ||||
-rw-r--r-- | net/tipc/net.h | 1 | ||||
-rw-r--r-- | net/tipc/netlink_compat.c | 43 | ||||
-rw-r--r-- | net/tipc/node.c | 78 | ||||
-rw-r--r-- | net/tipc/node.h | 5 | ||||
-rw-r--r-- | net/tipc/server.c | 184 | ||||
-rw-r--r-- | net/tipc/server.h | 14 | ||||
-rw-r--r-- | net/tipc/socket.c | 898 | ||||
-rw-r--r-- | net/tipc/subscr.c | 41 | ||||
-rw-r--r-- | net/tipc/subscr.h | 2 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 4 |
25 files changed, 2419 insertions, 523 deletions
diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 2bfaa9d4b403..37bb0bfbd936 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -9,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - server.o socket.o + server.o socket.o group.o tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index a140dd4a84af..37892b3909af 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -1,7 +1,7 @@ /* * net/tipc/bcast.c: TIPC broadcast code * - * Copyright (c) 2004-2006, 2014-2016, Ericsson AB + * Copyright (c) 2004-2006, 2014-2017, Ericsson AB * Copyright (c) 2004, Intel Corporation. * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. @@ -42,8 +42,8 @@ #include "link.h" #include "name_table.h" -#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ -#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ +#define BCLINK_WIN_DEFAULT 50 /* bcast link window size (default) */ +#define BCLINK_WIN_MIN 32 /* bcast minimum link window size */ const char tipc_bclink_name[] = "broadcast-link"; @@ -74,6 +74,10 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) return tipc_net(net)->bcbase; } +/* tipc_bcast_get_mtu(): -get the MTU currently used by broadcast link + * Note: the MTU is decremented to give room for a tunnel header, in + * case the message needs to be sent as replicast + */ int tipc_bcast_get_mtu(struct net *net) { return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; @@ -258,20 +262,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_nlist *dests, u16 *cong_link_cnt) { + struct tipc_dest *dst, *tmp; struct sk_buff_head _pkts; - struct u32_item *n, *tmp; - u32 dst, selector; + u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); skb_queue_head_init(&_pkts); - list_for_each_entry_safe(n, tmp, &dests->list, list) { - dst = n->value; - if (!tipc_msg_pskb_copy(dst, pkts, &_pkts)) + list_for_each_entry_safe(dst, tmp, &dests->list, list) { + dnode = dst->node; + if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) return -ENOMEM; /* Any other return value than -ELINKCONG is ignored */ - if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG) + if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) (*cong_link_cnt)++; } return 0; @@ -515,7 +519,7 @@ int tipc_bcast_init(struct net *net) spin_lock_init(&tipc_net(net)->bclock); if (!tipc_link_bc_create(net, 0, 0, - U16_MAX, + FB_MTU, BCLINK_WIN_DEFAULT, 0, &bb->inputq, @@ -554,7 +558,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = true; - else if (u32_push(&nl->list, node)) + else if (tipc_dest_push(&nl->list, node, 0)) nl->remote++; } @@ -562,13 +566,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = false; - else if (u32_del(&nl->list, node)) + else if (tipc_dest_del(&nl->list, node, 0)) nl->remote--; } void tipc_nlist_purge(struct tipc_nlist *nl) { - u32_list_purge(&nl->list); + tipc_dest_list_purge(&nl->list); nl->remote = 0; nl->local = 0; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 47ec121574ce..3e3dce3d4c63 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -324,6 +324,7 @@ restart: if (res) { pr_warn("Bearer <%s> rejected, enable failure (%d)\n", name, -res); + kfree(b); return -EINVAL; } @@ -347,8 +348,10 @@ restart: if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); - if (tipc_mon_create(net, bearer_id)) + if (tipc_mon_create(net, bearer_id)) { + bearer_disable(net, b); return -ENOMEM; + } pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, @@ -810,7 +813,7 @@ err_out: return err; } -int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -832,20 +835,27 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); - rtnl_lock(); bearer = tipc_bearer_find(net, name); - if (!bearer) { - rtnl_unlock(); + if (!bearer) return -EINVAL; - } bearer_disable(net, bearer); - rtnl_unlock(); return 0; } -int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_disable(skb, info); + rtnl_unlock(); + + return err; +} + +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) { int err; char *bearer; @@ -887,15 +897,18 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) prio = nla_get_u32(props[TIPC_NLA_PROP_PRIO]); } + return tipc_enable_bearer(net, bearer, domain, prio, attrs); +} + +int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) +{ + int err; + rtnl_lock(); - err = tipc_enable_bearer(net, bearer, domain, prio, attrs); - if (err) { - rtnl_unlock(); - return err; - } + err = __tipc_nl_bearer_enable(skb, info); rtnl_unlock(); - return 0; + return err; } int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) @@ -941,7 +954,7 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) return 0; } -int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -962,22 +975,17 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); - rtnl_lock(); b = tipc_bearer_find(net, name); - if (!b) { - rtnl_unlock(); + if (!b) return -EINVAL; - } if (attrs[TIPC_NLA_BEARER_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_BEARER_PROP], props); - if (err) { - rtnl_unlock(); + if (err) return err; - } if (props[TIPC_NLA_PROP_TOL]) b->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); @@ -986,11 +994,21 @@ int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) b->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); } - rtnl_unlock(); return 0; } +int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_bearer_set(skb, info); + rtnl_unlock(); + + return err; +} + static int __tipc_nl_add_media(struct tipc_nl_msg *msg, struct tipc_media *media, int nlflags) { @@ -1112,7 +1130,7 @@ err_out: return err; } -int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) { int err; char *name; @@ -1130,22 +1148,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) return -EINVAL; name = nla_data(attrs[TIPC_NLA_MEDIA_NAME]); - rtnl_lock(); m = tipc_media_find(name); - if (!m) { - rtnl_unlock(); + if (!m) return -EINVAL; - } if (attrs[TIPC_NLA_MEDIA_PROP]) { struct nlattr *props[TIPC_NLA_PROP_MAX + 1]; err = tipc_nl_parse_link_prop(attrs[TIPC_NLA_MEDIA_PROP], props); - if (err) { - rtnl_unlock(); + if (err) return err; - } if (props[TIPC_NLA_PROP_TOL]) m->tolerance = nla_get_u32(props[TIPC_NLA_PROP_TOL]); @@ -1154,7 +1167,17 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) if (props[TIPC_NLA_PROP_WIN]) m->window = nla_get_u32(props[TIPC_NLA_PROP_WIN]); } - rtnl_unlock(); return 0; } + +int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_media_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 42d6eeeb646d..a53613d95bc9 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -188,15 +188,19 @@ extern struct tipc_media udp_media_info; #endif int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info); int tipc_media_set_priority(const char *name, u32 new_value); int tipc_media_set_window(const char *name, u32 new_value); diff --git a/net/tipc/core.h b/net/tipc/core.h index 5cc5398be722..20b21af2ff14 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -49,7 +49,6 @@ #include <linux/uaccess.h> #include <linux/interrupt.h> #include <linux/atomic.h> -#include <asm/hardirq.h> #include <linux/netdevice.h> #include <linux/in.h> #include <linux/list.h> @@ -132,6 +131,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline struct tipc_server *tipc_topsrv(struct net *net) +{ + return tipc_net(net)->topsrv; +} + static inline unsigned int tipc_hashfn(u32 addr) { return addr & (NODE_HTABLE_SIZE - 1); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 02462d67d191..92e4828c6b09 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(unsigned long data) +static void disc_timeout(struct timer_list *t) { - struct tipc_link_req *req = (struct tipc_link_req *)data; + struct tipc_link_req *req = from_timer(req, t, timer); struct sk_buff *skb; int max_delay; @@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - setup_timer(&req->timer, disc_timeout, (unsigned long)req); + timer_setup(&req->timer, disc_timeout, 0); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; *skb = skb_clone(req->buf, GFP_ATOMIC); diff --git a/net/tipc/group.c b/net/tipc/group.c new file mode 100644 index 000000000000..04e516d18054 --- /dev/null +++ b/net/tipc/group.c @@ -0,0 +1,920 @@ +/* + * net/tipc/group.c: TIPC group messaging code + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "addr.h" +#include "group.h" +#include "bcast.h" +#include "server.h" +#include "msg.h" +#include "socket.h" +#include "node.h" +#include "name_table.h" +#include "subscr.h" + +#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) +#define ADV_IDLE ADV_UNIT +#define ADV_ACTIVE (ADV_UNIT * 12) + +enum mbr_state { + MBR_JOINING, + MBR_PUBLISHED, + MBR_JOINED, + MBR_PENDING, + MBR_ACTIVE, + MBR_RECLAIMING, + MBR_REMITTED, + MBR_LEAVING +}; + +struct tipc_member { + struct rb_node tree_node; + struct list_head list; + struct list_head small_win; + struct sk_buff_head deferredq; + struct tipc_group *group; + u32 node; + u32 port; + u32 instance; + enum mbr_state state; + u16 advertised; + u16 window; + u16 bc_rcv_nxt; + u16 bc_syncpt; + u16 bc_acked; +}; + +struct tipc_group { + struct rb_root members; + struct list_head small_win; + struct list_head pending; + struct list_head active; + struct tipc_nlist dests; + struct net *net; + int subid; + u32 type; + u32 instance; + u32 scope; + u32 portid; + u16 member_cnt; + u16 active_cnt; + u16 max_active; + u16 bc_snd_nxt; + u16 bc_ackers; + bool *open; + bool loopback; + bool events; +}; + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq); + +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *wakeup = true; +} + +static void tipc_group_decr_active(struct tipc_group *grp, + struct tipc_member *m) +{ + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING || + m->state == MBR_REMITTED) + grp->active_cnt--; +} + +static int tipc_group_rcvbuf_limit(struct tipc_group *grp) +{ + int max_active, active_pool, idle_pool; + int mcnt = grp->member_cnt + 1; + + /* Limit simultaneous reception from other members */ + max_active = min(mcnt / 8, 64); + max_active = max(max_active, 16); + grp->max_active = max_active; + + /* Reserve blocks for active and idle members */ + active_pool = max_active * ADV_ACTIVE; + idle_pool = (mcnt - max_active) * ADV_IDLE; + + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ + return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4; +} + +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) +{ + return grp->bc_snd_nxt; +} + +static bool tipc_group_is_receiver(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_LEAVING; +} + +static bool tipc_group_is_sender(struct tipc_member *m) +{ + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; +} + +u32 tipc_group_exclude(struct tipc_group *grp) +{ + if (!grp->loopback) + return grp->portid; + return 0; +} + +int tipc_group_size(struct tipc_group *grp) +{ + return grp->member_cnt; +} + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open) +{ + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; + struct tipc_group *grp; + u32 type = mreq->type; + + grp = kzalloc(sizeof(*grp), GFP_ATOMIC); + if (!grp) + return NULL; + tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + INIT_LIST_HEAD(&grp->small_win); + INIT_LIST_HEAD(&grp->active); + INIT_LIST_HEAD(&grp->pending); + grp->members = RB_ROOT; + grp->net = net; + grp->portid = portid; + grp->type = type; + grp->instance = mreq->instance; + grp->scope = mreq->scope; + grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + grp->open = group_is_open; + *grp->open = false; + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) + return grp; + kfree(grp); + return NULL; +} + +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + +void tipc_group_delete(struct net *net, struct tipc_group *grp) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + list_del(&m->list); + kfree(m); + } + tipc_node_distr_xmit(net, &xmitq); + tipc_nlist_purge(&grp->dests); + tipc_topsrv_kern_unsubscr(net, grp->subid); + kfree(grp); +} + +struct tipc_member *tipc_group_find_member(struct tipc_group *grp, + u32 node, u32 port) +{ + struct rb_node *n = grp->members.rb_node; + u64 nkey, key = (u64)node << 32 | port; + struct tipc_member *m; + + while (n) { + m = container_of(n, struct tipc_member, tree_node); + nkey = (u64)m->node << 32 | m->port; + if (key < nkey) + n = n->rb_left; + else if (key > nkey) + n = n->rb_right; + else + return m; + } + return NULL; +} + +static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, + u32 node, u32 port) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (m && tipc_group_is_receiver(m)) + return m; + return NULL; +} + +static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, + u32 node) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (m->node == node) + return m; + } + return NULL; +} + +static void tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) +{ + u64 nkey, key = (u64)m->node << 32 | m->port; + struct rb_node **n, *parent = NULL; + struct tipc_member *tmp; + + n = &grp->members.rb_node; + while (*n) { + tmp = container_of(*n, struct tipc_member, tree_node); + parent = *n; + tmp = container_of(parent, struct tipc_member, tree_node); + nkey = (u64)tmp->node << 32 | tmp->port; + if (key < nkey) + n = &(*n)->rb_left; + else if (key > nkey) + n = &(*n)->rb_right; + else + return; + } + rb_link_node(&m->tree_node, parent, n); + rb_insert_color(&m->tree_node, &grp->members); +} + +static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, + u32 node, u32 port, + u32 instance, int state) +{ + struct tipc_member *m; + + m = kzalloc(sizeof(*m), GFP_ATOMIC); + if (!m) + return NULL; + INIT_LIST_HEAD(&m->list); + INIT_LIST_HEAD(&m->small_win); + __skb_queue_head_init(&m->deferredq); + m->group = grp; + m->node = node; + m->port = port; + m->instance = instance; + m->bc_acked = grp->bc_snd_nxt - 1; + grp->member_cnt++; + tipc_group_add_to_tree(grp, m); + tipc_nlist_add(&grp->dests, m->node); + m->state = state; + return m; +} + +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) +{ + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); +} + +static void tipc_group_delete_member(struct tipc_group *grp, + struct tipc_member *m) +{ + rb_erase(&m->tree_node, &grp->members); + grp->member_cnt--; + + /* Check if we were waiting for replicast ack from this member */ + if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1)) + grp->bc_ackers--; + + list_del_init(&m->list); + list_del_init(&m->small_win); + tipc_group_decr_active(grp, m); + + /* If last member on a node, remove node from dest list */ + if (!tipc_group_find_node(grp, m->node)) + tipc_nlist_del(&grp->dests, m->node); + + kfree(m); +} + +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) +{ + return &grp->dests; +} + +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope) +{ + seq->type = grp->type; + seq->lower = grp->instance; + seq->upper = grp->instance; + *scope = grp->scope; +} + +void tipc_group_update_member(struct tipc_member *m, int len) +{ + struct tipc_group *grp = m->group; + struct tipc_member *_m, *tmp; + + if (!tipc_group_is_receiver(m)) + return; + + m->window -= len; + + if (m->window >= ADV_IDLE) + return; + + list_del_init(&m->small_win); + + /* Sort member into small_window members' list */ + list_for_each_entry_safe(_m, tmp, &grp->small_win, small_win) { + if (_m->window > m->window) + break; + } + list_add_tail(&m->small_win, &_m->small_win); +} + +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) +{ + u16 prev = grp->bc_snd_nxt - 1; + struct tipc_member *m; + struct rb_node *n; + u16 ackers = 0; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (tipc_group_is_receiver(m)) { + tipc_group_update_member(m, len); + m->bc_acked = prev; + ackers++; + } + } + + /* Mark number of acknowledges to expect, if any */ + if (ack) + grp->bc_ackers = ackers; + grp->bc_snd_nxt++; +} + +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **mbr) +{ + struct sk_buff_head xmitq; + struct tipc_member *m; + int adv, state; + + m = tipc_group_find_dest(grp, dnode, dport); + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; + return false; + } + *mbr = m; + + if (m->window >= len) + return false; + + *grp->open = false; + + /* If not fully advertised, do it now to prevent mutual blocking */ + adv = m->advertised; + state = m->state; + if (state == MBR_JOINED && adv == ADV_IDLE) + return true; + if (state == MBR_ACTIVE && adv == ADV_ACTIVE) + return true; + if (state == MBR_PENDING && adv == ADV_IDLE) + return true; + skb_queue_head_init(&xmitq); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); + tipc_node_distr_xmit(grp->net, &xmitq); + return true; +} + +bool tipc_group_bc_cong(struct tipc_group *grp, int len) +{ + struct tipc_member *m = NULL; + + /* If prev bcast was replicast, reject until all receivers have acked */ + if (grp->bc_ackers) { + *grp->open = false; + return true; + } + if (list_empty(&grp->small_win)) + return false; + + m = list_first_entry(&grp->small_win, struct tipc_member, small_win); + if (m->window >= len) + return false; + + return tipc_group_cong(grp, m->node, m->port, len, &m); +} + +/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number + */ +static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) +{ + struct tipc_msg *_hdr, *hdr = buf_msg(skb); + u16 bc_seqno = msg_grp_bc_seqno(hdr); + struct sk_buff *_skb, *tmp; + int mtyp = msg_type(hdr); + + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */ + if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (!less(bc_seqno, msg_grp_bc_seqno(_hdr))) + continue; + __skb_queue_before(defq, _skb, skb); + return; + } + /* Bcast was not bypassed, - add to tail */ + } + /* Unicasts are never bypassed, - always add to tail */ + __skb_queue_tail(defq, skb); +} + +/* tipc_group_filter_msg() - determine if we should accept arriving message + */ +void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + bool ack, deliver, update, leave = false; + struct sk_buff_head *defq; + struct tipc_member *m; + struct tipc_msg *hdr; + u32 node, port; + int mtyp, blks; + + if (!skb) + return; + + hdr = buf_msg(skb); + node = msg_orignode(hdr); + port = msg_origport(hdr); + + if (!msg_in_group(hdr)) + goto drop; + + m = tipc_group_find_member(grp, node, port); + if (!tipc_group_is_sender(m)) + goto drop; + + if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + goto drop; + + TIPC_SKB_CB(skb)->orig_member = m->instance; + defq = &m->deferredq; + tipc_group_sort_msg(skb, defq); + + while ((skb = skb_peek(defq))) { + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + blks = msg_blocks(hdr); + deliver = true; + ack = false; + update = false; + + if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + break; + + /* Decide what to do with message */ + switch (mtyp) { + case TIPC_GRP_MCAST_MSG: + if (msg_nameinst(hdr) != grp->instance) { + update = true; + deliver = false; + } + /* Fall thru */ + case TIPC_GRP_BCAST_MSG: + m->bc_rcv_nxt++; + ack = msg_grp_bc_ack_req(hdr); + break; + case TIPC_GRP_UCAST_MSG: + break; + case TIPC_GRP_MEMBER_EVT: + if (m->state == MBR_LEAVING) + leave = true; + if (!grp->events) + deliver = false; + break; + default: + break; + } + + /* Execute decisions */ + __skb_dequeue(defq); + if (deliver) + __skb_queue_tail(inputq, skb); + else + kfree_skb(skb); + + if (ack) + tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + + if (leave) { + __skb_queue_purge(defq); + tipc_group_delete_member(grp, m); + break; + } + if (!update) + continue; + + tipc_group_update_rcv_win(grp, blks, node, port, xmitq); + } + return; +drop: + kfree_skb(skb); +} + +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq) +{ + struct list_head *active = &grp->active; + int max_active = grp->max_active; + int reclaim_limit = max_active * 3 / 4; + int active_cnt = grp->active_cnt; + struct tipc_member *m, *rm, *pm; + + m = tipc_group_find_member(grp, node, port); + if (!m) + return; + + m->advertised -= blks; + + switch (m->state) { + case MBR_JOINED: + /* First, decide if member can go active */ + if (active_cnt <= max_active) { + m->state = MBR_ACTIVE; + list_add_tail(&m->list, active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } else { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + } + + if (active_cnt < reclaim_limit) + break; + + /* Reclaim from oldest active member, if possible */ + if (!list_empty(active)) { + rm = list_first_entry(active, struct tipc_member, list); + rm->state = MBR_RECLAIMING; + list_del_init(&rm->list); + tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); + break; + } + /* Nobody to reclaim from; - revert oldest pending to JOINED */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + list_del_init(&pm->list); + pm->state = MBR_JOINED; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_ACTIVE: + if (!list_is_last(&m->list, &grp->active)) + list_move_tail(&m->list, &grp->active); + if (m->advertised > (ADV_ACTIVE * 3 / 4)) + break; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + break; + case MBR_REMITTED: + if (m->advertised > ADV_IDLE) + break; + m->state = MBR_JOINED; + grp->active_cnt--; + if (m->advertised < ADV_IDLE) { + pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } + + if (list_empty(&grp->pending)) + return; + + /* Set oldest pending member to active and advertise */ + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + break; + case MBR_RECLAIMING: + case MBR_JOINING: + case MBR_LEAVING: + default: + break; + } +} + +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + int adv = 0; + + skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, + m->node, tipc_own_addr(grp->net), + m->port, grp->portid, 0); + if (!skb) + return; + + if (m->state == MBR_ACTIVE) + adv = ADV_ACTIVE - m->advertised; + else if (m->state == MBR_JOINED || m->state == MBR_PENDING) + adv = ADV_IDLE - m->advertised; + + hdr = buf_msg(skb); + + if (mtyp == GRP_JOIN_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_LEAVE_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + } else if (mtyp == GRP_ADV_MSG) { + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_ACK_MSG) { + msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); + } else if (mtyp == GRP_REMIT_MSG) { + msg_set_grp_remitted(hdr, m->window); + } + msg_set_dest_droppable(hdr, true); + __skb_queue_tail(xmitq, skb); +} + +void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, + struct tipc_msg *hdr, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + u32 node = msg_orignode(hdr); + u32 port = msg_origport(hdr); + struct tipc_member *m, *pm; + u16 remitted, in_flight; + + if (!grp) + return; + + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (msg_type(hdr)) { + case GRP_JOIN_MSG: + if (!m) + m = tipc_group_create_member(grp, node, port, + 0, MBR_JOINING); + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + m->bc_rcv_nxt = m->bc_syncpt; + m->window += msg_adv_win(hdr); + + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + return; + case GRP_LEAVE_MSG: + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); + return; + case GRP_ADV_MSG: + if (!m) + return; + m->window += msg_adv_win(hdr); + tipc_group_open(m, usr_wakeup); + return; + case GRP_ACK_MSG: + if (!m) + return; + m->bc_acked = msg_grp_bc_acked(hdr); + if (--grp->bc_ackers) + return; + list_del_init(&m->small_win); + *m->group->open = true; + *usr_wakeup = true; + tipc_group_update_member(m, 0); + return; + case GRP_RECLAIM_MSG: + if (!m) + return; + tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); + m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); + return; + case GRP_REMIT_MSG: + if (!m || m->state != MBR_RECLAIMING) + return; + + remitted = msg_grp_remitted(hdr); + + /* Messages preceding the REMIT still in receive queue */ + if (m->advertised > remitted) { + m->state = MBR_REMITTED; + in_flight = m->advertised - remitted; + m->advertised = ADV_IDLE + in_flight; + return; + } + /* This should never happen */ + if (m->advertised < remitted) + pr_warn_ratelimited("Unexpected REMIT msg\n"); + + /* All messages preceding the REMIT have been read */ + m->state = MBR_JOINED; + grp->active_cnt--; + m->advertised = ADV_IDLE; + + /* Set oldest pending member to active and advertise */ + if (list_empty(&grp->pending)) + return; + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + if (pm->advertised <= (ADV_ACTIVE * 3 / 4)) + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + return; + default: + pr_warn("Received unknown GROUP_PROTO message\n"); + } +} + +/* tipc_group_member_evt() - receive and handle a member up/down event + */ +void tipc_group_member_evt(struct tipc_group *grp, + bool *usr_wakeup, + int *sk_rcvbuf, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_event *evt = (void *)msg_data(hdr); + u32 instance = evt->found_lower; + u32 node = evt->port.node; + u32 port = evt->port.ref; + int event = evt->event; + struct tipc_member *m; + struct net *net; + u32 self; + + if (!grp) + return; + + net = grp->net; + self = tipc_own_addr(net); + if (!grp->loopback && node == self && port == grp->portid) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; + } + + if (m->state != MBR_JOINING) + break; + + /* Member can be taken into service */ + m->instance = instance; + m->state = MBR_JOINED; + tipc_group_open(m, usr_wakeup); + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: + if (!m) + break; + + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + list_del_init(&m->list); + tipc_group_open(m, usr_wakeup); + + /* Only send event if no LEAVE message can be expected */ + if (!tipc_node_is_up(net, node)) + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); + break; + default: + break; + } + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} diff --git a/net/tipc/group.h b/net/tipc/group.h new file mode 100644 index 000000000000..5996af6e9f1d --- /dev/null +++ b/net/tipc/group.h @@ -0,0 +1,75 @@ +/* + * net/tipc/group.h: Include file for TIPC group unicast/multicast functions + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_GROUP_H +#define _TIPC_GROUP_H + +#include "core.h" + +struct tipc_group; +struct tipc_member; +struct tipc_msg; + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq, + bool *group_is_open); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); +void tipc_group_delete(struct net *net, struct tipc_group *grp); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope); +u32 tipc_group_exclude(struct tipc_group *grp); +void tipc_group_filter_msg(struct tipc_group *grp, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, + int *sk_rcvbuf, struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **m); +bool tipc_group_bc_cong(struct tipc_group *grp, int len); +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq); +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +void tipc_group_update_member(struct tipc_member *m, int len); +#endif diff --git a/net/tipc/link.c b/net/tipc/link.c index ac0144f532aa..2d6b2aed30e0 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l) static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, @@ -482,7 +483,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id, /** * tipc_link_bc_create - create new link to be used for broadcast * @n: pointer to associated node - * @mtu: mtu to be used + * @mtu: mtu to be used initially if no peers * @window: send window to be used * @inputq: queue to put messages ready for delivery * @namedq: queue to put binding table update messages ready for delivery @@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) } if (state || probe || setup) - tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } @@ -1039,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker, static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { + struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; struct tipc_msg *hdr = buf_msg(skb); switch (msg_user(hdr)) { @@ -1046,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) { - skb_queue_tail(l->bc_rcvlink->inputq, skb); + if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { + skb_queue_tail(mc_inputq, skb); return true; } case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; + case GROUP_PROTOCOL: + skb_queue_tail(mc_inputq, skb); + return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; skb_queue_tail(l->namedq, skb); @@ -1170,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1184,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; - tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); @@ -1211,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1285,7 +1290,8 @@ drop: } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_link *bcl = l->bc_rcvlink; @@ -1333,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_seq_gap(hdr, rcvgap); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); skb_trim(skb, INT_H_SIZE + dlen); @@ -1438,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 rcv_nxt = l->rcv_nxt; u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + bool reply = msg_probe(hdr); void *data; char *if_name; int rc = 0; @@ -1524,9 +1532,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; - if (rcvgap || (msg_probe(hdr))) - tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, - 0, 0, xmitq); + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ @@ -2118,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 9e109bb1a207..32dc33a94bc7 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen, u16 gen = mon->dom_gen; u16 len; - if (!tipc_mon_is_active(net, mon)) + /* Send invalid record if not active */ + if (!tipc_mon_is_active(net, mon)) { + dom->len = 0; return; + } /* Send only a dummy record with ack if peer has acked our last sent */ if (likely(state->acked_gen == gen)) { @@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr, struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; + if (!tipc_mon_is_active(net, mon)) { + state->probing = false; + state->monitoring = true; + return; + } + /* Used cached state if table has not changed */ if (!state->probing && (state->list_gen == mon->list_gen) && @@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr, read_unlock_bh(&mon->lock); } -static void mon_timeout(unsigned long m) +static void mon_timeout(struct timer_list *t) { - struct tipc_monitor *mon = (void *)m; + struct tipc_monitor *mon = from_timer(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; @@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id) self->is_up = true; self->is_head = true; INIT_LIST_HEAD(&self->list); - setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); + timer_setup(&mon->timer, mon_timeout, 0); mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); mod_timer(&mon->timer, jiffies + mon->timer_intv); return 0; @@ -633,9 +642,13 @@ void tipc_mon_delete(struct net *net, int bearer_id) { struct tipc_net *tn = tipc_net(net); struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *self; struct tipc_peer *peer, *tmp; + if (!mon) + return; + + self = get_self(net, bearer_id); write_lock_bh(&mon->lock); tn->monitors[bearer_id] = NULL; list_for_each_entry_safe(peer, tmp, &self->list, list) { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 17146c16ee2d..4e1c6f6450bb 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == LAST_FRAGMENT) { TIPC_SKB_CB(head)->validated = false; - if (unlikely(!tipc_msg_validate(head))) + if (unlikely(!tipc_msg_validate(&head))) goto err; *buf = head; TIPC_SKB_CB(head)->tail = NULL; @@ -201,11 +201,21 @@ err: * TIPC will ignore the excess, under the assumption that it is optional info * introduced by a later release of the protocol. */ -bool tipc_msg_validate(struct sk_buff *skb) +bool tipc_msg_validate(struct sk_buff **_skb) { - struct tipc_msg *msg; + struct sk_buff *skb = *_skb; + struct tipc_msg *hdr; int msz, hsz; + /* Ensure that flow control ratio condition is satisfied */ + if (unlikely(skb->truesize / buf_roundup_len(skb) >= 4)) { + skb = skb_copy_expand(skb, BUF_HEADROOM, 0, GFP_ATOMIC); + if (!skb) + return false; + kfree_skb(*_skb); + *_skb = skb; + } + if (unlikely(TIPC_SKB_CB(skb)->validated)) return true; if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) @@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, hsz))) return false; - msg = buf_msg(skb); - if (unlikely(msg_version(msg) != TIPC_VERSION)) + hdr = buf_msg(skb); + if (unlikely(msg_version(hdr) != TIPC_VERSION)) return false; - msz = msg_size(msg); + msz = msg_size(hdr); if (unlikely(msz < hsz)) return false; if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) @@ -241,20 +251,23 @@ bool tipc_msg_validate(struct sk_buff *skb) * @pktmax: Max packet size that can be used * @list: Buffer or chain of buffers to be returned to caller * + * Note that the recursive call we are making here is safe, since it can + * logically go only one further level down. + * * Returns message data size or errno: -ENOMEM, -EFAULT */ -int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, - int offset, int dsz, int pktmax, struct sk_buff_head *list) +int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, + int dsz, int pktmax, struct sk_buff_head *list) { int mhsz = msg_hdr_sz(mhdr); + struct tipc_msg pkthdr; int msz = mhsz + dsz; - int pktno = 1; - int pktsz; int pktrem = pktmax; - int drem = dsz; - struct tipc_msg pkthdr; struct sk_buff *skb; + int drem = dsz; + int pktno = 1; char *pktpos; + int pktsz; int rc; msg_set_size(mhdr, msz); @@ -262,8 +275,18 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { skb = tipc_buf_acquire(msz, GFP_KERNEL); - if (unlikely(!skb)) + + /* Fall back to smaller MTU if node local message */ + if (unlikely(!skb)) { + if (pktmax != MAX_MSG_SIZE) + return -ENOMEM; + rc = tipc_msg_build(mhdr, m, offset, dsz, FB_MTU, list); + if (rc != dsz) + return rc; + if (tipc_msg_assemble(list)) + return dsz; return -ENOMEM; + } skb_orphan(skb); __skb_queue_tail(list, skb); skb_copy_to_linear_data(skb, mhdr, mhsz); @@ -411,7 +434,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) skb_pull(*iskb, offset); imsz = msg_size(buf_msg(*iskb)); skb_trim(*iskb, imsz); - if (unlikely(!tipc_msg_validate(*iskb))) + if (unlikely(!tipc_msg_validate(iskb))) goto none; *pos += align(imsz); return true; @@ -579,6 +602,30 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return true; } +/* tipc_msg_assemble() - assemble chain of fragments into one message + */ +bool tipc_msg_assemble(struct sk_buff_head *list) +{ + struct sk_buff *skb, *tmp = NULL; + + if (skb_queue_len(list) == 1) + return true; + + while ((skb = __skb_dequeue(list))) { + skb->next = NULL; + if (tipc_buf_append(&tmp, &skb)) { + __skb_queue_tail(list, skb); + return true; + } + if (!tmp) + break; + } + __skb_queue_purge(list); + __skb_queue_head_init(list); + pr_warn("Failed do assemble buffer\n"); + return false; +} + /* tipc_msg_reassemble() - clone a buffer chain of fragments and * reassemble the clones into one message */ @@ -666,3 +713,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, } kfree_skb(skb); } + +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + __skb_queue_tail(xmitq, skb); +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c843fd2bc48d..b4ba1b4f9ae7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014-2015 Ericsson AB + * Copyright (c) 2000-2007, 2014-2017 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -61,10 +61,14 @@ struct plist; /* * Payload message types */ -#define TIPC_CONN_MSG 0 -#define TIPC_MCAST_MSG 1 -#define TIPC_NAMED_MSG 2 -#define TIPC_DIRECT_MSG 3 +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 +#define TIPC_GRP_MEMBER_EVT 4 +#define TIPC_GRP_BCAST_MSG 5 +#define TIPC_GRP_MCAST_MSG 6 +#define TIPC_GRP_UCAST_MSG 7 /* * Internal message users @@ -73,11 +77,13 @@ struct plist; #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 +#define GROUP_PROTOCOL 9 #define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 #define SOCK_WAKEUP 14 /* pseudo user */ +#define TOP_SRV 15 /* pseudo user */ /* * Message header sizes @@ -86,16 +92,18 @@ struct plist; #define BASIC_H_SIZE 32 /* Basic payload message */ #define NAMED_H_SIZE 40 /* Named payload message */ #define MCAST_H_SIZE 44 /* Multicast payload message */ +#define GROUP_H_SIZE 44 /* Group payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ #define MAX_MSG_SIZE (MAX_H_SIZE + TIPC_MAX_USER_MSG_SIZE) - +#define FB_MTU 3744 #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { u32 bytes_read; + u32 orig_member; struct sk_buff *tail; bool validated; u16 chain_imp; @@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m) return msg_bits(m, 0, 0, 0x1ffff); } +static inline u32 msg_blocks(struct tipc_msg *m) +{ + return (msg_size(m) / 1024) + 1; +} + static inline u32 msg_data_sz(struct tipc_msg *m) { return msg_size(m) - msg_hdr_sz(m); @@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 19, 1, d); } +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); @@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 29, 0x7, n); } +static inline int msg_in_group(struct tipc_msg *m) +{ + int mtyp = msg_type(m); + + return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; +} + +static inline bool msg_is_grp_evt(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_GRP_MEMBER_EVT; +} + static inline u32 msg_named(struct tipc_msg *m) { return msg_type(m) == TIPC_NAMED_MSG; @@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m) static inline u32 msg_mcast(struct tipc_msg *m) { - return msg_type(m) == TIPC_MCAST_MSG; + int mtyp = msg_type(m); + + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || + (mtyp == TIPC_GRP_MCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) @@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define DSC_RESP_MSG 1 /* + * Group protocol message types + */ +#define GRP_JOIN_MSG 0 +#define GRP_LEAVE_MSG 1 +#define GRP_ADV_MSG 2 +#define GRP_ACK_MSG 3 +#define GRP_RECLAIM_MSG 4 +#define GRP_REMIT_MSG 5 + +/* * Word 1 */ static inline u32 msg_seq_gap(struct tipc_msg *m) @@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_adv_win(struct tipc_msg *m) +static inline u16 msg_adv_win(struct tipc_msg *m) { return msg_bits(m, 9, 0, 0xffff); } -static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) +static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 0, 0xffff, n); } @@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } +static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_bc_acked(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_remitted(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +/* Word 10 + */ +static inline u16 msg_grp_evt(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x3); +} + +static inline void msg_set_grp_evt(struct tipc_msg *m, int n) +{ + msg_set_bits(m, 10, 0, 0x3, n); +} + +static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x1); +} + +static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) +{ + msg_set_bits(m, 10, 0, 0x1, n); +} + +static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 10, 16, 0xffff); +} + +static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 10, 16, 0xffff, n); +} + static inline bool msg_peer_link_is_up(struct tipc_msg *m) { if (likely(msg_user(m) != LINK_PROTOCOL)) @@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) } struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); -bool tipc_msg_validate(struct sk_buff *skb); +bool tipc_msg_validate(struct sk_buff **_skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, @@ -831,6 +943,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); +bool tipc_msg_assemble(struct sk_buff_head *list); bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq); bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg, struct sk_buff_head *cpy); @@ -842,6 +955,11 @@ static inline u16 buf_seqno(struct sk_buff *skb) return msg_seqno(buf_msg(skb)); } +static inline int buf_roundup_len(struct sk_buff *skb) +{ + return (skb->len / 1024 + 1) * 1024; +} + /* tipc_skb_peek(): peek and reserve first buffer in list * @list: list to be peeked in * Returns pointer to first buffer in list, if any diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bd0aac87b41a..ed0457cc99d6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -43,6 +43,7 @@ #include "bcast.h" #include "addr.h" #include "node.h" +#include "group.h" #include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -327,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_PUBLISHED, publ->ref, - publ->node, created_subseq); + publ->node, publ->scope, + created_subseq); } return publ; } @@ -397,19 +399,21 @@ found: list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_WITHDRAWN, publ->ref, - publ->node, removed_subseq); + publ->node, publ->scope, + removed_subseq); } return publ; } /** - * tipc_nameseq_subscribe - attach a subscription, and issue - * the prescribed number of events if there is any sub- + * tipc_nameseq_subscribe - attach a subscription, and optionally + * issue the prescribed number of events if there is any sub- * sequence overlapping with the requested sequence */ static void tipc_nameseq_subscribe(struct name_seq *nseq, - struct tipc_subscription *s) + struct tipc_subscription *s, + bool status) { struct sub_seq *sseq = nseq->sseqs; struct tipc_name_seq ns; @@ -419,7 +423,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); - if (!sseq) + if (!status || !sseq) return; while (sseq != &nseq->sseqs[nseq->first_free]) { @@ -433,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, sseq->upper, TIPC_PUBLISHED, crs->ref, crs->node, + crs->scope, must_report); must_report = 0; } @@ -596,25 +601,52 @@ not_found: return ref; } -/** - * tipc_nametbl_mc_translate - find multicast destinations - * - * Creates list of all local ports that overlap the given multicast address; - * also determines if any off-node ports overlap. - * - * Note: Publications with a scope narrower than 'limit' are ignored. - * (i.e. local node-scope publications mustn't receive messages arriving - * from another node, even if the multcast link brought it here) - * - * Returns non-zero if any off-node ports overlap - */ -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports) +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all) { + u32 self = tipc_own_addr(net); + struct publication *publ; + struct name_info *info; struct name_seq *seq; struct sub_seq *sseq; + + *dstcnt = 0; + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (unlikely(!seq)) + goto exit; + spin_lock_bh(&seq->lock); + sseq = nameseq_find_subseq(seq, instance); + if (likely(sseq)) { + info = sseq->info; + list_for_each_entry(publ, &info->zone_list, zone_list) { + if (publ->scope != scope) + continue; + if (publ->ref == exclude && publ->node == self) + continue; + tipc_dest_push(dsts, publ->node, publ->ref); + (*dstcnt)++; + if (all) + continue; + list_move_tail(&publ->zone_list, &info->zone_list); + break; + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); + return !list_empty(dsts); +} + +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) +{ struct sub_seq *sseq_stop; struct name_info *info; + struct publication *p; + struct name_seq *seq; + struct sub_seq *sseq; int res = 0; rcu_read_lock(); @@ -626,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { - struct publication *publ; - if (sseq->lower > upper) break; - info = sseq->info; - list_for_each_entry(publ, &info->node_list, node_list) { - if (publ->scope <= limit) - u32_push(dports, publ->ref); + list_for_each_entry(p, &info->node_list, node_list) { + if (p->scope == scope || (!exact && p->scope < scope)) + tipc_dest_push(dports, 0, p->ref); } if (info->cluster_list_size != info->node_list_size) @@ -651,8 +680,7 @@ exit: * - Determines if any node local ports overlap */ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, - struct tipc_nlist *nodes) + u32 upper, struct tipc_nlist *nodes) { struct sub_seq *sseq, *stop; struct publication *publ; @@ -667,11 +695,41 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, spin_lock_bh(&seq->lock); sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); stop = seq->sseqs + seq->first_free; - for (; sseq->lower <= upper && sseq != stop; sseq++) { + for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (tipc_in_scope(domain, publ->node)) - tipc_nlist_add(nodes, publ->node); + tipc_nlist_add(nodes, publ->node); + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); +} + +/* tipc_nametbl_build_group - build list of communication group members + */ +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 scope) +{ + struct sub_seq *sseq, *stop; + struct name_info *info; + struct publication *p; + struct name_seq *seq; + + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (!seq) + goto exit; + + spin_lock_bh(&seq->lock); + sseq = seq->sseqs; + stop = seq->sseqs + seq->first_free; + for (; sseq != stop; sseq++) { + info = sseq->info; + list_for_each_entry(p, &info->zone_list, zone_list) { + if (p->scope != scope) + continue; + tipc_group_add_member(grp, p->node, p->ref, p->lower); } } spin_unlock_bh(&seq->lock); @@ -750,7 +808,7 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, /** * tipc_nametbl_subscribe - add a subscription object to the name table */ -void tipc_nametbl_subscribe(struct tipc_subscription *s) +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); @@ -764,7 +822,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) seq = tipc_nameseq_create(type, &tn->nametbl->seq_hlist[index]); if (seq) { spin_lock_bh(&seq->lock); - tipc_nameseq_subscribe(seq, s); + tipc_nameseq_subscribe(seq, s, status); spin_unlock_bh(&seq->lock); } else { tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); @@ -1057,78 +1115,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -bool u32_find(struct list_head *l, u32 value) +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return true; + list_for_each_entry(dst, l, list) { + if (dst->value != value) + continue; + return dst; } - return false; + return NULL; } -bool u32_push(struct list_head *l, u32 value) +bool tipc_dest_push(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return false; - } - item = kmalloc(sizeof(*item), GFP_ATOMIC); - if (unlikely(!item)) + if (tipc_dest_find(l, node, port)) return false; - item->value = value; - list_add(&item->list, l); + dst = kmalloc(sizeof(*dst), GFP_ATOMIC); + if (unlikely(!dst)) + return false; + dst->value = value; + list_add(&dst->list, l); return true; } -u32 u32_pop(struct list_head *l) +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port) { - struct u32_item *item; - u32 value = 0; + struct tipc_dest *dst; if (list_empty(l)) - return 0; - item = list_first_entry(l, typeof(*item), list); - value = item->value; - list_del(&item->list); - kfree(item); - return value; + return false; + dst = list_first_entry(l, typeof(*dst), list); + if (port) + *port = dst->port; + if (node) + *node = dst->node; + list_del(&dst->list); + kfree(dst); + return true; } -bool u32_del(struct list_head *l, u32 value) +bool tipc_dest_del(struct list_head *l, u32 node, u32 port) { - struct u32_item *item, *tmp; + struct tipc_dest *dst; - list_for_each_entry_safe(item, tmp, l, list) { - if (item->value != value) - continue; - list_del(&item->list); - kfree(item); - return true; - } - return false; + dst = tipc_dest_find(l, node, port); + if (!dst) + return false; + list_del(&dst->list); + kfree(dst); + return true; } -void u32_list_purge(struct list_head *l) +void tipc_dest_list_purge(struct list_head *l) { - struct u32_item *item, *tmp; + struct tipc_dest *dst, *tmp; - list_for_each_entry_safe(item, tmp, l, list) { - list_del(&item->list); - kfree(item); + list_for_each_entry_safe(dst, tmp, l, list) { + list_del(&dst->list); + kfree(dst); } } -int u32_list_len(struct list_head *l) +int tipc_dest_list_len(struct list_head *l) { - struct u32_item *item; + struct tipc_dest *dst; int i = 0; - list_for_each_entry(item, l, list) { + list_for_each_entry(dst, l, list) { i++; } return i; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 6ebdeb1d84a5..f56e7cb3d436 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -40,6 +40,7 @@ struct tipc_subscription; struct tipc_plist; struct tipc_nlist; +struct tipc_group; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -99,11 +100,15 @@ struct name_table { int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports); +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, - struct tipc_nlist *nodes); + u32 upper, struct tipc_nlist *nodes); +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all); struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key); @@ -115,21 +120,27 @@ struct publication *tipc_nametbl_insert_publ(struct net *net, u32 type, struct publication *tipc_nametbl_remove_publ(struct net *net, u32 type, u32 lower, u32 node, u32 ref, u32 key); -void tipc_nametbl_subscribe(struct tipc_subscription *s); +void tipc_nametbl_subscribe(struct tipc_subscription *s, bool status); void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); -struct u32_item { +struct tipc_dest { struct list_head list; - u32 value; + union { + struct { + u32 port; + u32 node; + }; + u64 value; + }; }; -bool u32_push(struct list_head *l, u32 value); -u32 u32_pop(struct list_head *l); -bool u32_find(struct list_head *l, u32 value); -bool u32_del(struct list_head *l, u32 value); -void u32_list_purge(struct list_head *l); -int u32_list_len(struct list_head *l); +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); +bool tipc_dest_push(struct list_head *l, u32 node, u32 port); +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); +bool tipc_dest_del(struct list_head *l, u32 node, u32 port); +void tipc_dest_list_purge(struct list_head *l); +int tipc_dest_list_len(struct list_head *l); #endif diff --git a/net/tipc/net.c b/net/tipc/net.c index 719c5924b638..1a2fde0d6f61 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -200,7 +200,7 @@ out: return skb->len; } -int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -241,10 +241,19 @@ int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) if (!tipc_addr_node_valid(addr)) return -EINVAL; - rtnl_lock(); tipc_net_start(net, addr); - rtnl_unlock(); } return 0; } + +int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info) +{ + int err; + + rtnl_lock(); + err = __tipc_nl_net_set(skb, info); + rtnl_unlock(); + + return err; +} diff --git a/net/tipc/net.h b/net/tipc/net.h index c7c254902873..c0306aa2374b 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -47,5 +47,6 @@ void tipc_net_stop(struct net *net); int tipc_nl_net_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); +int __tipc_nl_net_set(struct sk_buff *skb, struct genl_info *info); #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index e48f0b2c01b9..4492cda45566 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -285,10 +285,6 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, if (!trans_buf) return -ENOMEM; - err = (*cmd->transcode)(cmd, trans_buf, msg); - if (err) - goto trans_out; - attrbuf = kmalloc((tipc_genl_family.maxattr + 1) * sizeof(struct nlattr *), GFP_KERNEL); if (!attrbuf) { @@ -296,27 +292,34 @@ static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd, goto trans_out; } - err = nla_parse(attrbuf, tipc_genl_family.maxattr, - (const struct nlattr *)trans_buf->data, - trans_buf->len, NULL, NULL); - if (err) - goto parse_out; - doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!doit_buf) { err = -ENOMEM; - goto parse_out; + goto attrbuf_out; } - doit_buf->sk = msg->dst_sk; - memset(&info, 0, sizeof(info)); info.attrs = attrbuf; + rtnl_lock(); + err = (*cmd->transcode)(cmd, trans_buf, msg); + if (err) + goto doit_out; + + err = nla_parse(attrbuf, tipc_genl_family.maxattr, + (const struct nlattr *)trans_buf->data, + trans_buf->len, NULL, NULL); + if (err) + goto doit_out; + + doit_buf->sk = msg->dst_sk; + err = (*cmd->doit)(doit_buf, &info); +doit_out: + rtnl_unlock(); kfree_skb(doit_buf); -parse_out: +attrbuf_out: kfree(attrbuf); trans_out: kfree_skb(trans_buf); @@ -722,13 +725,13 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd, media = tipc_media_find(lc->name); if (media) { - cmd->doit = &tipc_nl_media_set; + cmd->doit = &__tipc_nl_media_set; return tipc_nl_compat_media_set(skb, msg); } bearer = tipc_bearer_find(msg->net, lc->name); if (bearer) { - cmd->doit = &tipc_nl_bearer_set; + cmd->doit = &__tipc_nl_bearer_set; return tipc_nl_compat_bearer_set(skb, msg); } @@ -1089,12 +1092,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_ENABLE_BEARER: msg->req_type = TIPC_TLV_BEARER_CONFIG; - doit.doit = tipc_nl_bearer_enable; + doit.doit = __tipc_nl_bearer_enable; doit.transcode = tipc_nl_compat_bearer_enable; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_DISABLE_BEARER: msg->req_type = TIPC_TLV_BEARER_NAME; - doit.doit = tipc_nl_bearer_disable; + doit.doit = __tipc_nl_bearer_disable; doit.transcode = tipc_nl_compat_bearer_disable; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SHOW_LINK_STATS: @@ -1148,12 +1151,12 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg) return tipc_nl_compat_dumpit(&dump, msg); case TIPC_CMD_SET_NODE_ADDR: msg->req_type = TIPC_TLV_NET_ADDR; - doit.doit = tipc_nl_net_set; + doit.doit = __tipc_nl_net_set; doit.transcode = tipc_nl_compat_net_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_SET_NETID: msg->req_type = TIPC_TLV_UNSIGNED; - doit.doit = tipc_nl_net_set; + doit.doit = __tipc_nl_net_set; doit.transcode = tipc_nl_compat_net_set; return tipc_nl_compat_doit(&doit, msg); case TIPC_CMD_GET_NETID: diff --git a/net/tipc/node.c b/net/tipc/node.c index 198dbc7adbe1..9036d8756e73 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); -static void tipc_node_timeout(unsigned long data); +static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static void tipc_node_put(struct tipc_node *node); -static bool tipc_node_is_up(struct tipc_node *n); +static bool node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) goto exit; } tipc_node_get(n); - setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + timer_setup(&n->timer, tipc_node_timeout, 0); n->keepalive_intv = U32_MAX; hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) /* tipc_node_timeout - handle expiration of node timer */ -static void tipc_node_timeout(unsigned long data) +static void tipc_node_timeout(struct timer_list *t) { - struct tipc_node *n = (struct tipc_node *)data; + struct tipc_node *n = from_timer(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; int bearer_id; @@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, *slot1 = i; } - if (!tipc_node_is_up(n)) { + if (!node_is_up(n)) { if (tipc_link_peer_is_down(l)) tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); @@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_sk_rcv(n->net, &le->inputq); } -static bool tipc_node_is_up(struct tipc_node *n) +static bool node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } +bool tipc_node_is_up(struct net *net, u32 addr) +{ + struct tipc_node *n; + bool retval = false; + + if (in_own_node(net, addr)) + return true; + + n = tipc_node_find(net, addr); + if (!n) + return false; + retval = node_is_up(n); + tipc_node_put(n); + return retval; +} + void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *b, u16 capabilities, u32 signature, @@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) goto attr_msg_full; - if (tipc_node_is_up(node)) + if (node_is_up(node)) if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) goto attr_msg_full; @@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations + * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected + */ +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + u32 selector, dnode; + + while ((skb = __skb_dequeue(xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, selector); + } + return 0; +} + void tipc_node_broadcast(struct net *net, struct sk_buff *skb) { struct sk_buff *txskb; @@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) dst = n->addr; if (in_own_node(net, dst)) continue; - if (!tipc_node_is_up(n)) + if (!node_is_up(n)) continue; txskb = pskb_copy(skb, GFP_ATOMIC); if (!txskb) @@ -1507,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) __skb_queue_head_init(&xmitq); /* Ensure message is well-formed before touching the header */ - if (unlikely(!tipc_msg_validate(skb))) + if (unlikely(!tipc_msg_validate(&skb))) goto discard; hdr = buf_msg(skb); usr = msg_user(hdr); @@ -1848,36 +1880,38 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info) if (strcmp(name, tipc_bclink_name) == 0) { err = tipc_nl_add_bc_link(net, &msg); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } else { int bearer_id; struct tipc_node *node; struct tipc_link *link; node = tipc_node_find_by_name(net, name, &bearer_id); - if (!node) - return -EINVAL; + if (!node) { + err = -EINVAL; + goto err_free; + } tipc_node_read_lock(node); link = node->links[bearer_id].link; if (!link) { tipc_node_read_unlock(node); - nlmsg_free(msg.skb); - return -EINVAL; + err = -EINVAL; + goto err_free; } err = __tipc_nl_add_link(net, &msg, link, 0); tipc_node_read_unlock(node); - if (err) { - nlmsg_free(msg.skb); - return err; - } + if (err) + goto err_free; } return genlmsg_reply(msg.skb, info); + +err_free: + nlmsg_free(msg.skb); + return err; } int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info) diff --git a/net/tipc/node.h b/net/tipc/node.h index 898c22916984..acd58d23a70e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -48,7 +48,8 @@ enum { TIPC_BCAST_SYNCH = (1 << 1), TIPC_BCAST_STATE_NACK = (1 << 2), TIPC_BLOCK_FLOWCTL = (1 << 3), - TIPC_BCAST_RCAST = (1 << 4) + TIPC_BCAST_RCAST = (1 << 4), + TIPC_MCAST_GROUPS = (1 << 5) }; #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ @@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); @@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +bool tipc_node_is_up(struct net *net, u32 addr); u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/server.c b/net/tipc/server.c index 3cd6402e812c..df0c563c90cd 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -36,6 +36,8 @@ #include "server.h" #include "core.h" #include "socket.h" +#include "addr.h" +#include "msg.h" #include <net/sock.h> #include <linux/module.h> @@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); sock_release(sock); con->sock = NULL; - - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); } - + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); tipc_clean_outqueues(con); kfree(con); } @@ -132,10 +132,11 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); - if (con && test_bit(CF_CONNECTED, &con->flags)) - conn_get(con); - else - con = NULL; + if (con) { + if (!test_bit(CF_CONNECTED, &con->flags) || + !kref_get_unless_zero(&con->kref)) + con = NULL; + } spin_unlock_bh(&s->idr_lock); return con; } @@ -183,34 +184,28 @@ static void tipc_register_callbacks(struct socket *sock, struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_unregister_callbacks(struct tipc_conn *con) -{ - struct sock *sk = con->sock->sk; - - write_lock_bh(&sk->sk_callback_lock); - sk->sk_user_data = NULL; - write_unlock_bh(&sk->sk_callback_lock); -} - static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; + bool disconnect = false; - if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - tipc_unregister_callbacks(con); - + write_lock_bh(&sk->sk_callback_lock); + disconnect = test_and_clear_bit(CF_CONNECTED, &con->flags); + if (disconnect) { + sk->sk_user_data = NULL; if (con->conid) s->tipc_conn_release(con->conid, con->usr_data); + } + write_unlock_bh(&sk->sk_callback_lock); - /* We shouldn't flush pending works as we may be in the - * thread. In fact the races with pending rx/tx work structs - * are harmless for us here as we have already deleted this - * connection from server connection list. - */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); + /* Handle concurrent calls from sending and receiving threads */ + if (!disconnect) + return; - conn_put(con); - } + /* Don't flush pending works, -just let them expire */ + kernel_sock_shutdown(con->sock, SHUT_RDWR); + conn_put(con); } static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) @@ -247,9 +242,10 @@ static struct tipc_conn *tipc_alloc_conn(struct tipc_server *s) static int tipc_receive_from_sock(struct tipc_conn *con) { - struct msghdr msg = {}; struct tipc_server *s = con->server; + struct sock *sk = con->sock->sk; struct sockaddr_tipc addr; + struct msghdr msg = {}; struct kvec iov; void *buf; int ret; @@ -263,19 +259,22 @@ static int tipc_receive_from_sock(struct tipc_conn *con) iov.iov_base = buf; iov.iov_len = s->max_rcvbuf_size; msg.msg_name = &addr; - ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, - MSG_DONTWAIT); + iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, iov.iov_len); + ret = sock_recvmsg(con->sock, &msg, MSG_DONTWAIT); if (ret <= 0) { kmem_cache_free(s->rcvbuf_cache, buf); goto out_close; } - s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, &addr, - con->usr_data, buf, ret); - + read_lock_bh(&sk->sk_callback_lock); + if (test_bit(CF_CONNECTED, &con->flags)) + ret = s->tipc_conn_recvmsg(sock_net(con->sock->sk), con->conid, + &addr, con->usr_data, buf, ret); + read_unlock_bh(&sk->sk_callback_lock); kmem_cache_free(s->rcvbuf_cache, buf); - - return 0; + if (ret < 0) + tipc_conn_terminate(s, con->conid); + return ret; out_close: if (ret != -EWOULDBLOCK) @@ -313,6 +312,7 @@ static int tipc_accept_from_sock(struct tipc_conn *con) newcon->usr_data = s->tipc_conn_new(newcon->conid); if (!newcon->usr_data) { sock_release(newsock); + conn_put(newcon); return -ENOMEM; } @@ -487,38 +487,110 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) +{ + struct tipc_subscriber *scbr; + struct tipc_subscr sub; + struct tipc_server *s; + struct tipc_conn *con; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = filter; + *(u32 *)&sub.usr_handle = port; + + con = tipc_alloc_conn(tipc_topsrv(net)); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + s = con->server; + scbr = s->tipc_conn_new(*conid); + if (!scbr) { + conn_put(con); + return false; + } + + con->usr_data = scbr; + con->sock = NULL; + s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub)); + return true; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + struct tipc_server *srv; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + + test_and_clear_bit(CF_CONNECTED, &con->flags); + srv = con->server; + if (con->conid) + srv->tipc_conn_release(con->conid, con->usr_data); + conn_put(con); + conn_put(con); +} + +static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + static void tipc_send_to_sock(struct tipc_conn *con) { - int count = 0; struct tipc_server *s = con->server; struct outqueue_entry *e; + struct tipc_event *evt; struct msghdr msg; + int count = 0; int ret; spin_lock_bh(&con->outqueue_lock); while (test_bit(CF_CONNECTED, &con->flags)) { - e = list_entry(con->outqueue.next, struct outqueue_entry, - list); + e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == &con->outqueue) break; - spin_unlock_bh(&con->outqueue_lock); - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; + spin_unlock_bh(&con->outqueue_lock); - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); - } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; + if (con->sock) { + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + } else { + evt = e->iov.iov_base; + tipc_send_kern_top_evt(s->net, evt); } - /* Don't starve users filling buffers */ if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); diff --git a/net/tipc/server.h b/net/tipc/server.h index 34f8055afa3b..64df7513cd70 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -41,6 +41,9 @@ #include <net/net_namespace.h> #define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 +#define TIPC_SUB_NO_STATUS 0x80 /** * struct tipc_server - TIPC server structure @@ -71,9 +74,9 @@ struct tipc_server { int max_rcvbuf_size; void *(*tipc_conn_new)(int conid); void (*tipc_conn_release)(int conid, void *usr_data); - void (*tipc_conn_recvmsg)(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len); + int (*tipc_conn_recvmsg)(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len); struct sockaddr_tipc *saddr; char name[TIPC_SERVER_NAME_LEN]; int imp; @@ -83,13 +86,16 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); +void tipc_topsrv_kern_unsubscr(struct net *net, int conid); + /** * tipc_conn_terminate - terminate connection with server * * Note: Must call it in process context since it might sleep */ void tipc_conn_terminate(struct tipc_server *s, int conid); - int tipc_server_start(struct tipc_server *s); void tipc_server_stop(struct tipc_server *s); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d50edd6e0019..7dfa9fc99ec3 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2016, Ericsson AB + * Copyright (c) 2001-2007, 2012-2017, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -45,9 +45,10 @@ #include "socket.h" #include "bcast.h" #include "netlink.h" +#include "group.h" #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 @@ -61,6 +62,11 @@ enum { TIPC_CONNECTING = TCP_SYN_SENT, }; +struct sockaddr_pair { + struct sockaddr_tipc sock; + struct sockaddr_tipc member; +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -78,7 +84,7 @@ enum { * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links - * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @snt_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer * @peer: 'connected' peer for dgram/rdm * @node: hash table node @@ -109,20 +115,23 @@ struct tipc_sock { struct rhash_head node; struct tipc_mc_method mc_method; struct rcu_head rcu; + struct tipc_group *group; + bool group_is_open; }; -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); -static void tipc_sk_timeout(unsigned long data); +static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); +static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); @@ -193,6 +202,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk) return tsk->snt_unacked > tsk->snd_win; } +static u16 tsk_blocks(int len) +{ + return ((len / FLOWCTL_BLK_SZ) + 1); +} + /* tsk_blocks(): translate a buffer size in bytes to number of * advertisable blocks, taking into account the ratio truesize(len)/len * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ @@ -451,14 +465,15 @@ static int tipc_sk_create(struct net *net, struct socket *sock, NAMED_H_SIZE, 0); msg_set_origport(msg, tsk->portid); - setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; - sk->sk_backlog_rcv = tipc_backlog_rcv; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; sk->sk_destruct = tipc_sock_destruct; tsk->conn_timeout = CONN_TIMEOUT_DEFAULT; + tsk->group_is_open = true; atomic_set(&tsk->dupl_rcvcnt, 0); /* Start out with safe limits until we receive an advertised window */ @@ -559,13 +574,14 @@ static int tipc_release(struct socket *sock) __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; + tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); /* Reject any messages that accumulated in backlog queue */ release_sock(sk); - u32_list_purge(&tsk->cong_links); + tipc_dest_list_purge(&tsk->cong_links); tsk->cong_link_cnt = 0; call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; @@ -601,7 +617,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, res = tipc_sk_withdraw(tsk, 0, NULL); goto exit; } - + if (tsk->group) { + res = -EACCES; + goto exit; + } if (uaddr_len < sizeof(struct sockaddr_tipc)) { res = -EINVAL; goto exit; @@ -693,43 +712,44 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * imply that the operation will succeed, merely that it should be performed * and will not block. */ -static unsigned int tipc_poll(struct file *file, struct socket *sock, +static __poll_t tipc_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - u32 mask = 0; + __poll_t revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + revents |= EPOLLHUP; switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) - mask |= POLLOUT; + revents |= EPOLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); + revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_OPEN: - if (!tsk->cong_link_cnt) - mask |= POLLOUT; - if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) - mask |= (POLLIN | POLLRDNORM); + if (tsk->group_is_open && !tsk->cong_link_cnt) + revents |= EPOLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; + if (skb_queue_empty(&sk->sk_receive_queue)) + break; + revents |= EPOLLIN | EPOLLRDNORM; break; case TIPC_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); + revents = EPOLLIN | EPOLLRDNORM | EPOLLHUP; break; } - - return mask; + return revents; } /** @@ -752,11 +772,13 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); struct tipc_mc_method *method = &tsk->mc_method; - u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE); struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; + if (tsk->group) + return -EACCES; + /* Block or return if any destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); if (unlikely(rc)) @@ -765,7 +787,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, domain, &dsts); + seq->upper, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; @@ -794,6 +816,298 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, } /** + * tipc_send_group_msg - send a message to a member in the group + * @net: network namespace + * @m: message to send + * @mb: group member + * @dnode: destination node + * @dport: destination port + * @dlen: total length of message data + */ +static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, + struct msghdr *m, struct tipc_member *mb, + u32 dnode, u32 dport, int dlen) +{ + u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); + struct tipc_mc_method *method = &tsk->mc_method; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct sk_buff_head pkts; + int mtu, rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_UCAST_MSG); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, dport); + msg_set_destnode(hdr, dnode); + msg_set_grp_bc_seqno(hdr, bc_snd_nxt); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(&tsk->cong_links, dnode, 0); + tsk->cong_link_cnt++; + } + + /* Update send window */ + tipc_group_update_member(mb, blks); + + /* A broadcast sent within next EXPIRE period must follow same path */ + method->rcast = true; + method->mandatory = true; + return dlen; +} + +/** + * tipc_send_group_unicast - send message to a member in the group + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct net *net = sock_net(sk); + struct tipc_member *mb = NULL; + u32 node, port; + int rc; + + node = dest->addr.id.node; + port = dest->addr.id.ref; + if (!port && !node) + return -EHOSTUNREACH; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(&tsk->cong_links, node, 0) && + !tipc_group_cong(grp, node, port, blks, &mb)); + if (unlikely(rc)) + return rc; + + if (unlikely(!mb)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_anycast - send message to any member with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct list_head *cong_links = &tsk->cong_links; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_member *first = NULL; + struct tipc_member *mbr = NULL; + struct net *net = sock_net(sk); + u32 node, port, exclude; + struct list_head dsts; + u32 type, inst, scope; + int lookups = 0; + int dstcnt, rc; + bool cong; + + INIT_LIST_HEAD(&dsts); + + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); + exclude = tipc_group_exclude(grp); + + while (++lookups < 4) { + first = NULL; + + /* Look for a non-congested destination member, if any */ + while (1) { + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, false)) + return -EHOSTUNREACH; + tipc_dest_pop(&dsts, &node, &port); + cong = tipc_group_cong(grp, node, port, blks, &mbr); + if (!cong) + break; + if (mbr == first) + break; + if (!first) + first = mbr; + } + + /* Start over if destination was not in member list */ + if (unlikely(!mbr)) + continue; + + if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) + break; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(cong_links, node, 0) && + !tipc_group_cong(grp, node, port, + blks, &mbr)); + if (unlikely(rc)) + return rc; + + /* Send, unless destination disappeared while waiting */ + if (likely(mbr)) + break; + } + + if (unlikely(lookups >= 4)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_bcast - send message to all members in communication group + * @sk: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_nlist *dsts = tipc_group_dests(grp); + struct tipc_mc_method *method = &tsk->mc_method; + bool ack = method->mandatory && method->rcast; + int blks = tsk_blocks(MCAST_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + int rc = -EHOSTUNREACH; + + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + + /* Block or return if any destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && + !tipc_group_bc_cong(grp, blks)); + if (unlikely(rc)) + return rc; + + /* Complete message header */ + if (dest) { + msg_set_type(hdr, TIPC_GRP_MCAST_MSG); + msg_set_nameinst(hdr, dest->addr.name.name.instance); + } else { + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_nameinst(hdr, 0); + } + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); + + /* Avoid getting stuck with repeated forced replicasts */ + msg_set_grp_bc_ack_req(hdr, ack); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Update broadcast sequence number and send windows */ + tipc_group_update_bc_members(tsk->group, blks, ack); + + /* Broadcast link is now free to choose method for next broadcast */ + method->mandatory = false; + method->expires = jiffies; + + return dlen; +} + +/** + * tipc_send_group_mcast - send message to all members with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct net *net = sock_net(sk); + u32 type, inst, scope, exclude; + struct list_head dsts; + u32 dstcnt; + + INIT_LIST_HEAD(&dsts); + + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); + exclude = tipc_group_exclude(grp); + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) + return -EHOSTUNREACH; + + if (dstcnt == 1) { + tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + return tipc_send_group_unicast(sock, m, dlen, timeout); + } + + tipc_dest_list_purge(&dsts); + return tipc_send_group_bcast(sock, m, dlen, timeout); +} + +/** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets * @arrvq: queue with arriving messages, to be cloned after destination lookup * @inputq: queue with cloned messages, delivered to socket after dest lookup @@ -803,32 +1117,64 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - struct tipc_msg *msg; - struct list_head dports; - u32 portid; - u32 scope = TIPC_CLUSTER_SCOPE; - struct sk_buff_head tmpq; - uint hsz; + u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; + u32 portid, oport, onode; + struct sk_buff_head tmpq; + struct list_head dports; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - - if (in_own_node(net, msg_orignode(msg))) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - tipc_nametbl_mc_translate(net, - msg_nametype(msg), msg_namelower(msg), - msg_nameupper(msg), scope, &dports); - portid = u32_pop(&dports); - for (; portid; portid = u32_pop(&dports)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + __skb_dequeue(arrvq); + __skb_queue_tail(inputq, skb); + } + kfree_skb(skb); + spin_unlock_bh(&inputq->lock); + continue; + } + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); + } + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ + while (tipc_dest_pop(&dports, NULL, &portid)) { + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@ -850,16 +1196,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, } /** - * tipc_sk_proto_rcv - receive a connection mng protocol message + * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { - struct sock *sk = &tsk->sk; - u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); + u32 onode = tsk_own_node(tsk); + struct sock *sk = &tsk->sk; int mtyp = msg_type(hdr); bool conn_cong; @@ -931,6 +1277,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; @@ -941,18 +1288,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; + if (likely(dest)) { + if (unlikely(m->msg_namelen < sizeof(*dest))) + return -EINVAL; + if (unlikely(dest->family != AF_TIPC)) + return -EINVAL; + } + + if (grp) { + if (!dest) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_NAME) + return tipc_send_group_anycast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_ID) + return tipc_send_group_unicast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_MCAST) + return tipc_send_group_mcast(sock, m, dlen, timeout); + return -EINVAL; + } + if (unlikely(!dest)) { dest = &tsk->peer; if (!syn || dest->family != AF_TIPC) return -EDESTADDRREQ; } - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - - if (unlikely(dest->family != AF_TIPC)) - return -EINVAL; - if (unlikely(syn)) { if (sk->sk_state == TIPC_LISTEN) return -EPIPE; @@ -985,7 +1345,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_destport(hdr, dport); if (unlikely(!dport && !dnode)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(hdr, TIPC_DIRECT_MSG); @@ -996,7 +1355,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) } /* Block or return if destination link is congested */ - rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode)); + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(clinks, dnode, 0)); if (unlikely(rc)) return rc; @@ -1008,7 +1368,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - u32_push(clinks, dnode); + tipc_dest_push(clinks, dnode, 0); tsk->cong_link_cnt++; rc = 0; } @@ -1128,7 +1488,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); @@ -1142,26 +1502,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, } /** - * set_orig_addr - capture sender's address for received message + * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @msg: received message header + * @hdr: received message header * * Note: Address is not captured if not requested by receiver. */ -static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) +static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); + DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); + struct tipc_msg *hdr = buf_msg(skb); - if (addr) { - addr->family = AF_TIPC; - addr->addrtype = TIPC_ADDR_ID; - memset(&addr->addr, 0, sizeof(addr->addr)); - addr->addr.id.ref = msg_origport(msg); - addr->addr.id.node = msg_orignode(msg); - addr->addr.name.domain = 0; /* could leave uninitialized */ - addr->scope = 0; /* could leave uninitialized */ - m->msg_namelen = sizeof(struct sockaddr_tipc); - } + if (!srcaddr) + return; + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; + srcaddr->sock.scope = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) + return; + + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; + m->msg_namelen = sizeof(*srcaddr); } /** @@ -1318,11 +1690,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) { struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *skb; - struct tipc_msg *hdr; bool connected = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct sk_buff_head xmitq; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool grp_evt; long timeout; /* Catch invalid receive requests */ @@ -1336,8 +1710,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + /* Step rcv queue to first msg with data or error; wait if necessary */ do { - /* Look at first msg in receive queue; wait if necessary */ rc = tipc_wait_for_rcvmsg(sock, &timeout); if (unlikely(rc)) goto exit; @@ -1346,13 +1720,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); err = msg_errcode(hdr); + grp_evt = msg_is_grp_evt(hdr); if (likely(dlen || err)) break; tsk_advance_rx_queue(sk); } while (1); /* Collect msg meta data, including error code and rejected data */ - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (unlikely(rc)) goto exit; @@ -1372,15 +1747,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; + /* Mark message as group event if applicable */ + if (unlikely(grp_evt)) { + if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) + m->msg_flags |= MSG_EOR; + m->msg_flags |= MSG_OOB; + copy = 0; + } + /* Caption of data or error code/rejected data was successful */ if (unlikely(flags & MSG_PEEK)) goto exit; + /* Send group flow control advertisement when applicable */ + if (tsk->group && msg_in_group(hdr) && !grp_evt) { + skb_queue_head_init(&xmitq); + tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), + msg_orignode(hdr), msg_origport(hdr), + &xmitq); + tipc_node_distr_xmit(sock_net(sk), &xmitq); + } + tsk_advance_rx_queue(sk); + if (likely(!connected)) goto exit; - /* Send connection flow control ack when applicable */ + /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); @@ -1446,7 +1839,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Collect msg meta data, incl. error code and rejected data */ if (!copied) { - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (rc) break; @@ -1505,8 +1898,8 @@ static void tipc_write_space(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | - POLLWRNORM | POLLWRBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | + EPOLLWRNORM | EPOLLWRBAND); rcu_read_unlock(); } @@ -1522,8 +1915,8 @@ static void tipc_data_ready(struct sock *sk) rcu_read_lock(); wq = rcu_dereference(sk->sk_wq); if (skwq_has_sleeper(wq)) - wake_up_interruptible_sync_poll(&wq->wait, POLLIN | - POLLRDNORM | POLLRDBAND); + wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | + EPOLLRDNORM | EPOLLRDBAND); rcu_read_unlock(); } @@ -1532,14 +1925,50 @@ static void tipc_sock_destruct(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); } +static void tipc_sk_proto_rcv(struct sock *sk, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_group *grp = tsk->group; + bool wakeup = false; + + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + return; + case SOCK_WAKEUP: + tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + tsk->cong_link_cnt--; + wakeup = true; + break; + case GROUP_PROTOCOL: + tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); + break; + case TOP_SRV: + tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, + hdr, inputq, xmitq); + break; + default: + break; + } + + if (wakeup) + sk->sk_write_space(sk); + + kfree_skb(skb); +} + /** - * filter_connect - Handle all incoming messages for a connection-based socket + * tipc_filter_connect - Handle incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns true if everything ok, false otherwise */ -static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -1643,6 +2072,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); + if (unlikely(msg_in_group(hdr))) + return sk->sk_rcvbuf; + if (unlikely(!msg_connected(hdr))) return sk->sk_rcvbuf << msg_importance(hdr); @@ -1653,7 +2085,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) } /** - * filter_rcv - validate incoming message + * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. * @@ -1662,99 +2094,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Called with socket lock already taken * - * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { + bool sk_conn = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = buf_msg(skb); - unsigned int limit = rcvbuf_limit(sk, skb); - int err = TIPC_OK; - int usr = msg_user(hdr); - u32 onode; + struct net *net = sock_net(sk); + struct sk_buff_head inputq; + int limit, err = TIPC_OK; - if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb, xmitq); - return false; - } + TIPC_SKB_CB(skb)->bytes_read = 0; + __skb_queue_head_init(&inputq); + __skb_queue_tail(&inputq, skb); - if (unlikely(usr == SOCK_WAKEUP)) { - onode = msg_orignode(hdr); - kfree_skb(skb); - u32_del(&tsk->cong_links, onode); - tsk->cong_link_cnt--; - sk->sk_write_space(sk); - return false; - } + if (unlikely(!msg_isdata(hdr))) + tipc_sk_proto_rcv(sk, &inputq, xmitq); - /* Drop if illegal message type */ - if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { - kfree_skb(skb); - return false; - } + if (unlikely(grp)) + tipc_group_filter_msg(grp, &inputq, xmitq); - /* Reject if wrong message type for current socket state */ - if (tipc_sk_type_connectionless(sk)) { - if (msg_connected(hdr)) { + /* Validate and add to receive buffer if there is space */ + while ((skb = __skb_dequeue(&inputq))) { + hdr = buf_msg(skb); + limit = rcvbuf_limit(sk, skb); + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + (!sk_conn && msg_connected(hdr)) || + (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; - goto reject; - } - } else if (unlikely(!filter_connect(tsk, skb))) { - err = TIPC_ERR_NO_PORT; - goto reject; - } + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) + err = TIPC_ERR_OVERLOAD; - /* Reject message if there isn't room to queue it */ - if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { - err = TIPC_ERR_OVERLOAD; - goto reject; + if (unlikely(err)) { + tipc_skb_reject(net, err, skb, xmitq); + err = TIPC_OK; + continue; + } + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk); } - - /* Enqueue message */ - TIPC_SKB_CB(skb)->bytes_read = 0; - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); - - sk->sk_data_ready(sk); - return true; - -reject: - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) - __skb_queue_tail(xmitq, skb); - return false; } /** - * tipc_backlog_rcv - handle incoming message from backlog queue + * tipc_sk_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @skb: message * * Caller must hold socket lock - * - * Returns 0 */ -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - unsigned int truesize = skb->truesize; + unsigned int before = sk_rmem_alloc_get(sk); struct sk_buff_head xmitq; - u32 dnode, selector; + unsigned int added; __skb_queue_head_init(&xmitq); - if (likely(filter_rcv(sk, skb, &xmitq))) { - atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); - return 0; - } - - if (skb_queue_empty(&xmitq)) - return 0; + tipc_sk_filter_rcv(sk, skb, &xmitq); + added = sk_rmem_alloc_get(sk) - before; + atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); - /* Send response/rejected message */ - skb = __skb_dequeue(&xmitq); - dnode = msg_destnode(buf_msg(skb)); - selector = msg_origport(buf_msg(skb)); - tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); return 0; } @@ -1786,7 +2190,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb, xmitq); + tipc_sk_filter_rcv(sk, skb, xmitq); continue; } @@ -1833,14 +2237,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) spin_unlock_bh(&sk->sk_lock.slock); } /* Send pending response/rejected messages, if any */ - while ((skb = __skb_dequeue(&xmitq))) { - dnode = msg_destnode(buf_msg(skb)); - tipc_node_xmit_skb(net, skb, dnode, dport); - } + tipc_node_distr_xmit(sock_net(sk), &xmitq); sock_put(sk); continue; } - /* No destination socket => dequeue skb if still there */ skb = tipc_skb_dequeue(inputq, dport); if (!skb) @@ -1903,28 +2303,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int previous; int res = 0; + if (destlen != sizeof(struct sockaddr_tipc)) + return -EINVAL; + lock_sock(sk); - /* DGRAM/RDM connect(), just save the destaddr */ - if (tipc_sk_type_connectionless(sk)) { - if (dst->family == AF_UNSPEC) { - memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); - } else if (destlen != sizeof(struct sockaddr_tipc)) { - res = -EINVAL; - } else { - memcpy(&tsk->peer, dest, destlen); - } + if (tsk->group) { + res = -EINVAL; goto exit; } - /* - * Reject connection attempt using multicast address - * - * Note: send_msg() validates the rest of the address fields, - * so there's no need to do it here - */ - if (dst->addrtype == TIPC_ADDR_MCAST) { + if (dst->family == AF_UNSPEC) { + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); + if (!tipc_sk_type_connectionless(sk)) + res = -EINVAL; + goto exit; + } else if (dst->family != AF_TIPC) { + res = -EINVAL; + } + if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) res = -EINVAL; + if (res) + goto exit; + + /* DGRAM/RDM connect(), just save the destaddr */ + if (tipc_sk_type_connectionless(sk)) { + memcpy(&tsk->peer, dest, destlen); goto exit; } @@ -2141,46 +2545,43 @@ static int tipc_shutdown(struct socket *sock, int how) return res; } -static void tipc_sk_timeout(unsigned long data) +static void tipc_sk_timeout(struct timer_list *t) { - struct tipc_sock *tsk = (struct tipc_sock *)data; - struct sock *sk = &tsk->sk; - struct sk_buff *skb = NULL; - u32 peer_port, peer_node; + struct sock *sk = from_timer(sk, t, sk_timer); + struct tipc_sock *tsk = tipc_sk(sk); + u32 peer_port = tsk_peer_port(tsk); + u32 peer_node = tsk_peer_node(tsk); u32 own_node = tsk_own_node(tsk); + u32 own_port = tsk->portid; + struct net *net = sock_net(sk); + struct sk_buff *skb = NULL; bh_lock_sock(sk); - if (!tipc_sk_connected(sk)) { - bh_unlock_sock(sk); + if (!tipc_sk_connected(sk)) + goto exit; + + /* Try again later if socket is busy */ + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); goto exit; } - peer_port = tsk_peer_port(tsk); - peer_node = tsk_peer_node(tsk); if (tsk->probe_unacked) { - if (!sock_owned_by_user(sk)) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), - tsk_peer_port(tsk)); - sk->sk_state_change(sk); - } else { - /* Try again later */ - sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); - } - - bh_unlock_sock(sk); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, peer_node, peer_port); + sk->sk_state_change(sk); goto exit; } - - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); + /* Send new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + peer_node, own_node, peer_port, own_port, + TIPC_OK); tsk->probe_unacked = true; - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +exit: bh_unlock_sock(sk); if (skb) - tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); -exit: + tipc_node_xmit_skb(net, skb, peer_node, own_port); sock_put(sk); } @@ -2254,9 +2655,7 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_enter(&tn->sk_rht, &iter); do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2265,7 +2664,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } -walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } @@ -2345,6 +2744,60 @@ void tipc_sk_rht_destroy(struct net *net) rhashtable_destroy(&tn->sk_rht); } +static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_name_seq seq; + int rc; + + if (mreq->type < TIPC_RESERVED_TYPES) + return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; + if (grp) + return -EACCES; + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); + if (!grp) + return -ENOMEM; + tsk->group = grp; + msg_set_lookup_scope(hdr, mreq->scope); + msg_set_nametype(hdr, mreq->type); + msg_set_dest_droppable(hdr, true); + seq.type = mreq->type; + seq.lower = mreq->instance; + seq.upper = seq.lower; + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); + rc = tipc_sk_publish(tsk, mreq->scope, &seq); + if (rc) { + tipc_group_delete(net, grp); + tsk->group = NULL; + return rc; + } + /* Eliminate any risk that a broadcast overtakes sent JOINs */ + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); + return rc; +} + +static int tipc_sk_leave(struct tipc_sock *tsk) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_name_seq seq; + int scope; + + if (!grp) + return -EINVAL; + tipc_group_self(grp, &seq, &scope); + tipc_group_delete(net, grp); + tsk->group = NULL; + tipc_sk_withdraw(tsk, scope, &seq); + return 0; +} + /** * tipc_setsockopt - set socket option * @sock: socket structure @@ -2363,6 +2816,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group_req mreq; u32 value = 0; int res = 0; @@ -2378,9 +2832,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_CONN_TIMEOUT: if (ol < sizeof(value)) return -EINVAL; - res = get_user(value, (u32 __user *)ov); - if (res) - return res; + if (get_user(value, (u32 __user *)ov)) + return -EFAULT; + break; + case TIPC_GROUP_JOIN: + if (ol < sizeof(mreq)) + return -EINVAL; + if (copy_from_user(&mreq, ov, sizeof(mreq))) + return -EFAULT; break; default: if (ov || ol) @@ -2413,6 +2872,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; break; + case TIPC_GROUP_JOIN: + res = tipc_sk_join(tsk, &mreq); + break; + case TIPC_GROUP_LEAVE: + res = tipc_sk_leave(tsk); + break; default: res = -EINVAL; } @@ -2440,7 +2905,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - int len; + struct tipc_name_seq seq; + int len, scope; u32 value; int res; @@ -2474,6 +2940,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_GROUP_JOIN: + seq.type = 0; + if (tsk->group) + tipc_group_self(tsk->group, &seq, &scope); + value = seq.type; + break; default: res = -EINVAL; } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index be3d9e3183dc..68e26470c516 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -118,24 +118,28 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) + u32 node, u32 scope, int must) { + u32 filter = htohl(sub->evt.s.filter, sub->swap); struct tipc_name_seq seq; tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; - if (!must && - !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) + if (!must && !(filter & TIPC_SUB_PORTS)) + return; + if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + return; + if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); } -static void tipc_subscrp_timeout(unsigned long data) +static void tipc_subscrp_timeout(struct timer_list *t) { - struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscription *sub = from_timer(sub, t, timer); struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); @@ -285,29 +289,30 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, return sub; } -static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, - struct tipc_subscriber *subscriber, int swap) +static int tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, + struct tipc_subscriber *subscriber, int swap, + bool status) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub = NULL; u32 timeout; sub = tipc_subscrp_create(net, s, swap); if (!sub) - return tipc_conn_terminate(tn->topsrv, subscriber->conid); + return -1; spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); sub->subscriber = subscriber; - tipc_nametbl_subscribe(sub); + tipc_nametbl_subscribe(sub, status); tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); - setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); + timer_setup(&sub->timer, tipc_subscrp_timeout, 0); timeout = htohl(sub->evt.s.timeout, swap); if (timeout != TIPC_WAIT_FOREVER) mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); + return 0; } /* Handle one termination request for the subscriber */ @@ -317,12 +322,13 @@ static void tipc_subscrb_release_cb(int conid, void *usr_data) } /* Handle one request to create a new subscription for the subscriber */ -static void tipc_subscrb_rcv_cb(struct net *net, int conid, - struct sockaddr_tipc *addr, void *usr_data, - void *buf, size_t len) +static int tipc_subscrb_rcv_cb(struct net *net, int conid, + struct sockaddr_tipc *addr, void *usr_data, + void *buf, size_t len) { struct tipc_subscriber *subscriber = usr_data; struct tipc_subscr *s = (struct tipc_subscr *)buf; + bool status; int swap; /* Determine subscriber's endianness */ @@ -332,10 +338,11 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, /* Detect & process a subscription cancellation request */ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { s->filter &= ~htohl(TIPC_SUB_CANCEL, swap); - return tipc_subscrp_cancel(s, subscriber); + tipc_subscrp_cancel(s, subscriber); + return 0; } - - tipc_subscrp_subscribe(net, s, subscriber, swap); + status = !(s->filter & htohl(TIPC_SUB_NO_STATUS, swap)); + return tipc_subscrp_subscribe(net, s, subscriber, swap, status); } /* Handle one request to establish a new subscriber */ diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957dc952..f3edca775d9f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper); void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, int must); + u32 port_ref, u32 node, u32 scope, int must); void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, struct tipc_name_seq *out); u32 tipc_subscrp_convert_seq_type(u32 type, int swap); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ecca64fc6a6f..3deabcab4882 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -371,10 +371,6 @@ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) goto rcu_out; } - tipc_rcv(sock_net(sk), skb, b); - rcu_read_unlock(); - return 0; - rcu_out: rcu_read_unlock(); out: |