From 97a6ec4ac021f7fbec05c15a3aa0c4aaf0461af5 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 4 Dec 2017 10:31:41 -0800 Subject: rhashtable: Change rhashtable_walk_start to return void Most callers of rhashtable_walk_start don't care about a resize event which is indicated by a return value of -EAGAIN. So calls to rhashtable_walk_start are wrapped wih code to ignore -EAGAIN. Something like this is common: ret = rhashtable_walk_start(rhiter); if (ret && ret != -EAGAIN) goto out; Since zero and -EAGAIN are the only possible return values from the function this check is pointless. The condition never evaluates to true. This patch changes rhashtable_walk_start to return void. This simplifies code for the callers that ignore -EAGAIN. For the few cases where the caller cares about the resize event, particularly where the table can be walked in mulitple parts for netlink or seq file dump, the function rhashtable_walk_start_check has been added that returns -EAGAIN on a resize event. Signed-off-by: Tom Herbert Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/tipc/socket.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 5d18c0caa92b..22c4fd8a9dfe 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2640,9 +2640,7 @@ void tipc_sk_reinit(struct net *net) rhashtable_walk_enter(&tn->sk_rht, &iter); do { - tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (IS_ERR(tsk)) - goto walk_stop; + rhashtable_walk_start(&iter); while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2651,7 +2649,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } -walk_stop: + rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } -- cgit v1.2.3 From 7ad32bcb7855ae8a60a8cf98e1b9da77cfdba4d0 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 8 Jan 2018 21:03:26 +0100 Subject: tipc: create group member event messages when they are needed In the current implementation, a group socket receiving topology events about other members just converts the topology event message into a group event message and stores it until it reaches the right state to issue it to the user. This complicates the code unnecessarily, and becomes impractical when we in the coming commits will need to create and issue membership events independently. In this commit, we change this so that we just notice the type and origin of the incoming topology event, and then drop the buffer. Only when it is time to actually send a group event to the user do we explicitly create a new message and send it upwards. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 95 +++++++++++++++++++++++++++++++------------------------ net/tipc/group.h | 2 +- net/tipc/socket.c | 3 +- 3 files changed, 56 insertions(+), 44 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/group.c b/net/tipc/group.c index a352e098f0e7..e08b7acc7b2d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -64,7 +64,6 @@ struct tipc_member { struct rb_node tree_node; struct list_head list; struct list_head small_win; - struct sk_buff *event_msg; struct sk_buff_head deferredq; struct tipc_group *group; u32 node; @@ -632,6 +631,40 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, } } +static void tipc_group_create_event(struct tipc_group *grp, + struct tipc_member *m, + u32 event, u16 seqno, + struct sk_buff_head *inputq) +{ u32 dnode = tipc_own_addr(grp->net); + struct tipc_event evt; + struct sk_buff *skb; + struct tipc_msg *hdr; + + evt.event = event; + evt.found_lower = m->instance; + evt.found_upper = m->instance; + evt.port.ref = m->port; + evt.port.node = m->node; + evt.s.seq.type = grp->type; + evt.s.seq.lower = m->instance; + evt.s.seq.upper = m->instance; + + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_GRP_MEMBER_EVT, + GROUP_H_SIZE, sizeof(evt), dnode, m->node, + grp->portid, m->port, 0); + if (!skb) + return; + + hdr = buf_msg(skb); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + msg_set_dest_droppable(hdr, true); + msg_set_grp_bc_seqno(hdr, seqno); + memcpy(msg_data(hdr), &evt, sizeof(evt)); + TIPC_SKB_CB(skb)->orig_member = m->instance; + __skb_queue_tail(inputq, skb); +} + static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq) { @@ -677,7 +710,6 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, u32 node = msg_orignode(hdr); u32 port = msg_origport(hdr); struct tipc_member *m, *pm; - struct tipc_msg *ehdr; u16 remitted, in_flight; if (!grp) @@ -704,9 +736,8 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, *usr_wakeup = true; m->usr_pending = false; tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); } list_del_init(&m->small_win); tipc_group_update_member(m, 0); @@ -725,10 +756,9 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, m->state = MBR_LEAVING; return; } - /* Otherwise deliver already received WITHDRAW event */ - ehdr = buf_msg(m->event_msg); - msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); - __skb_queue_tail(inputq, m->event_msg); + /* Otherwise deliver member WITHDRAW event */ + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); return; case GRP_ADV_MSG: if (!m) @@ -797,11 +827,10 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, void tipc_group_member_evt(struct tipc_group *grp, bool *usr_wakeup, int *sk_rcvbuf, - struct sk_buff *skb, + struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq) { - struct tipc_msg *hdr = buf_msg(skb); struct tipc_event *evt = (void *)msg_data(hdr); u32 instance = evt->found_lower; u32 node = evt->port.node; @@ -813,21 +842,12 @@ void tipc_group_member_evt(struct tipc_group *grp, u32 self; if (!grp) - goto drop; + return; net = grp->net; self = tipc_own_addr(net); if (!grp->loopback && node == self && port == grp->portid) - goto drop; - - /* Convert message before delivery to user */ - msg_set_hdr_sz(hdr, GROUP_H_SIZE); - msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); - msg_set_type(hdr, TIPC_GRP_MEMBER_EVT); - msg_set_origport(hdr, port); - msg_set_orignode(hdr, node); - msg_set_nametype(hdr, grp->type); - msg_set_grp_evt(hdr, event); + return; m = tipc_group_find_member(grp, node, port); @@ -836,59 +856,52 @@ void tipc_group_member_evt(struct tipc_group *grp, m = tipc_group_create_member(grp, node, port, MBR_DISCOVERED); if (!m) - goto drop; + return; + + m->instance = instance; /* Hold back event if JOIN message not yet received */ if (m->state == MBR_DISCOVERED) { - m->event_msg = skb; m->state = MBR_PUBLISHED; } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); m->state = MBR_JOINED; *usr_wakeup = true; m->usr_pending = false; } - m->instance = instance; - TIPC_SKB_CB(skb)->orig_member = m->instance; tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); tipc_group_update_member(m, 0); } else if (event == TIPC_WITHDRAWN) { if (!m) - goto drop; - - TIPC_SKB_CB(skb)->orig_member = m->instance; + return; *usr_wakeup = true; m->usr_pending = false; node_up = tipc_node_is_up(net, node); - m->event_msg = NULL; if (node_up) { /* Hold back event if a LEAVE msg should be expected */ if (m->state != MBR_LEAVING) { - m->event_msg = skb; tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); - __skb_queue_tail(inputq, skb); + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); } } else { if (m->state != MBR_LEAVING) { tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; - msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_rcv_nxt, inputq); } else { - msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + tipc_group_create_event(grp, m, TIPC_WITHDRAWN, + m->bc_syncpt, inputq); } - __skb_queue_tail(inputq, skb); } list_del_init(&m->list); list_del_init(&m->small_win); } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); - return; -drop: - kfree_skb(skb); } diff --git a/net/tipc/group.h b/net/tipc/group.h index d525e1cd7de5..5ffffd0121a2 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -54,7 +54,7 @@ void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, - int *sk_rcvbuf, struct sk_buff *skb, + int *sk_rcvbuf, struct tipc_msg *hdr, struct sk_buff_head *inputq, struct sk_buff_head *xmitq); void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b51d5cba5094..36744ebef74f 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1933,8 +1933,7 @@ static void tipc_sk_proto_rcv(struct sock *sk, break; case TOP_SRV: tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, - skb, inputq, xmitq); - skb = NULL; + hdr, inputq, xmitq); break; default: break; -- cgit v1.2.3 From d12d2e12cec2d66eab6cd58f592dad9fd386b97d Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 8 Jan 2018 21:03:28 +0100 Subject: tipc: send out join messages as soon as new member is discovered When a socket is joining a group, we look up in the binding table to find if there are already other members of the group present. This is used for being able to return EAGAIN instead of EHOSTUNREACH if the user proceeds directly to a send attempt. However, the information in the binding table can be used to directly set the created member in state MBR_PUBLISHED and send a JOIN message to the peer, instead of waiting for a topology PUBLISH event to do this. When there are many members in a group, the propagation time for such events can be significant, and we can save time during the join operation if we use the initial lookup result fully. In this commit, we eliminate the member state MBR_DISCOVERED which has been the result of the initial lookup, and do instead go directly to MBR_PUBLISHED, which initiates the setup. After this change, the tipc_member FSM looks as follows: +-----------+ ---->| PUBLISHED |-----------------------------------------------+ PUB- +-----------+ LEAVE/WITHRAW | LISH |JOIN | | +-------------------------------------------+ | | | LEAVE/WITHDRAW | | | | +------------+ | | | | +----------->| PENDING |---------+ | | | | |msg/maxactv +-+---+------+ LEAVE/ | | | | | | | | WITHDRAW | | | | | | +----------+ | | | | | | | |revert/maxactv| | | | | | | V V V V V | +----------+ msg +------------+ +-----------+ +-->| JOINED |------>| ACTIVE |------>| LEAVING |---> | +----------+ +--- -+------+ LEAVE/+-----------+DOWN | A A | WITHDRAW A A A EVT | | | |RECLAIM | | | | | |REMIT V | | | | | |== adv +------------+ | | | | | +---------| RECLAIMING |--------+ | | | | +-----+------+ LEAVE/ | | | | |REMIT WITHDRAW | | | | |< adv | | | |msg/ V LEAVE/ | | | |adv==ADV_IDLE+------------+ WITHDRAW | | | +-------------| REMITTED |------------+ | | +------------+ | |PUBLISH | JOIN +-----------+ LEAVE/WITHDRAW | ---->| JOINING |-----------------------------------------------+ +-----------+ Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 100 +++++++++++++++++++++++++++++++------------------- net/tipc/group.h | 4 +- net/tipc/name_table.c | 2 +- net/tipc/socket.c | 4 +- 4 files changed, 68 insertions(+), 42 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/group.c b/net/tipc/group.c index bdc54be9c07e..6ca07f0da60c 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -49,7 +49,6 @@ #define ADV_ACTIVE (ADV_UNIT * 12) enum mbr_state { - MBR_DISCOVERED, MBR_JOINING, MBR_PUBLISHED, MBR_JOINED, @@ -141,7 +140,7 @@ static bool tipc_group_is_receiver(struct tipc_member *m) static bool tipc_group_is_sender(struct tipc_member *m) { - return m && m->state >= MBR_JOINED; + return m && m->state != MBR_JOINING && m->state != MBR_PUBLISHED; } u32 tipc_group_exclude(struct tipc_group *grp) @@ -184,6 +183,21 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, return NULL; } +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + skb_queue_head_init(&xmitq); + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); + tipc_group_update_member(m, 0); + } + tipc_node_distr_xmit(net, &xmitq); + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); +} + void tipc_group_delete(struct net *net, struct tipc_group *grp) { struct rb_root *tree = &grp->members; @@ -274,7 +288,7 @@ static void tipc_group_add_to_tree(struct tipc_group *grp, static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, u32 node, u32 port, - int state) + u32 instance, int state) { struct tipc_member *m; @@ -287,6 +301,7 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, m->group = grp; m->node = node; m->port = port; + m->instance = instance; m->bc_acked = grp->bc_snd_nxt - 1; grp->member_cnt++; tipc_group_add_to_tree(grp, m); @@ -295,9 +310,10 @@ static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, return m; } -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance) { - tipc_group_create_member(grp, node, port, MBR_DISCOVERED); + tipc_group_create_member(grp, node, port, instance, MBR_PUBLISHED); } static void tipc_group_delete_member(struct tipc_group *grp, @@ -623,7 +639,6 @@ void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); break; case MBR_RECLAIMING: - case MBR_DISCOVERED: case MBR_JOINING: case MBR_LEAVING: default: @@ -721,26 +736,26 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, case GRP_JOIN_MSG: if (!m) m = tipc_group_create_member(grp, node, port, - MBR_JOINING); + 0, MBR_JOINING); if (!m) return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); m->bc_rcv_nxt = m->bc_syncpt; m->window += msg_adv_win(hdr); - /* Wait until PUBLISH event is received */ - if (m->state == MBR_DISCOVERED) { - m->state = MBR_JOINING; - } else if (m->state == MBR_PUBLISHED) { - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); - tipc_group_create_event(grp, m, TIPC_PUBLISHED, - m->bc_syncpt, inputq); - } + /* Wait until PUBLISH event is received if necessary */ + if (m->state != MBR_PUBLISHED) + return; + + /* Member can be taken into service */ + m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; list_del_init(&m->small_win); tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); return; case GRP_LEAVE_MSG: if (!m) @@ -844,30 +859,36 @@ void tipc_group_member_evt(struct tipc_group *grp, m = tipc_group_find_member(grp, node, port); - if (event == TIPC_PUBLISHED) { - if (!m) - m = tipc_group_create_member(grp, node, port, - MBR_DISCOVERED); - if (!m) - return; + switch (event) { + case TIPC_PUBLISHED: + /* Send and wait for arrival of JOIN message if necessary */ + if (!m) { + m = tipc_group_create_member(grp, node, port, instance, + MBR_PUBLISHED); + if (!m) + break; + tipc_group_update_member(m, 0); + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + break; + } - m->instance = instance; + if (m->state != MBR_JOINING) + break; - /* Hold back event if JOIN message not yet received */ - if (m->state == MBR_DISCOVERED) { - m->state = MBR_PUBLISHED; - } else { - tipc_group_create_event(grp, m, TIPC_PUBLISHED, - m->bc_syncpt, inputq); - m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - } - tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + /* Member can be taken into service */ + m->instance = instance; + m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; + list_del_init(&m->small_win); tipc_group_update_member(m, 0); - } else if (event == TIPC_WITHDRAWN) { + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + tipc_group_create_event(grp, m, TIPC_PUBLISHED, + m->bc_syncpt, inputq); + break; + case TIPC_WITHDRAWN: if (!m) - return; + break; *usr_wakeup = true; m->usr_pending = false; @@ -880,6 +901,9 @@ void tipc_group_member_evt(struct tipc_group *grp, if (!tipc_node_is_up(net, node)) tipc_group_create_event(grp, m, TIPC_WITHDRAWN, m->bc_rcv_nxt, inputq); + break; + default: + break; } *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); } diff --git a/net/tipc/group.h b/net/tipc/group.h index 5ffffd0121a2..dee79477d499 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -44,8 +44,10 @@ struct tipc_msg; struct tipc_group *tipc_group_create(struct net *net, u32 portid, struct tipc_group_req *mreq); +void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); void tipc_group_delete(struct net *net, struct tipc_group *grp); -void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); +void tipc_group_add_member(struct tipc_group *grp, u32 node, + u32 port, u32 instance); struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, int *scope); diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index b3829bcf63c7..e04ab72f313c 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -732,7 +732,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, list_for_each_entry(p, &info->zone_list, zone_list) { if (!tipc_in_scope(domain, p->node)) continue; - tipc_group_add_member(grp, p->node, p->ref); + tipc_group_add_member(grp, p->node, p->ref, p->lower); } } spin_unlock_bh(&seq->lock); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 36744ebef74f..e3a02f1fcab5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2757,10 +2757,10 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) tipc_group_delete(net, grp); tsk->group = NULL; } - - /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; + tipc_group_join(net, grp, &tsk->sk.sk_rcvbuf); return rc; } -- cgit v1.2.3 From 232d07b74a33b9f5d48516dc1d8ce41723ada593 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 8 Jan 2018 21:03:30 +0100 Subject: tipc: improve groupcast scope handling When a member joins a group, it also indicates a binding scope. This makes it possible to create both node local groups, invisible to other nodes, as well as cluster global groups, visible everywhere. In order to avoid that different members end up having permanently differing views of group size and memberhip, we must inhibit locally and globally bound members from joining the same group. We do this by using the binding scope as an additional separator between groups. I.e., a member must ignore all membership events from sockets using a different scope than itself, and all lookups for message destinations must require an exact match between the message's lookup scope and the potential target's binding scope. Apart from making it possible to create local groups using the same identity on different nodes, a side effect of this is that it now also becomes possible to create a cluster global group with the same identity across the same nodes, without interfering with the local groups. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- include/uapi/linux/tipc.h | 7 ++-- net/tipc/group.c | 13 ++++--- net/tipc/name_table.c | 40 ++++++++++----------- net/tipc/name_table.h | 4 +-- net/tipc/server.c | 4 +-- net/tipc/server.h | 6 ++-- net/tipc/socket.c | 88 ++++++++++++++++++++++++++++------------------- net/tipc/subscr.c | 10 ++++-- net/tipc/subscr.h | 2 +- 9 files changed, 99 insertions(+), 75 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 35f79d1f8c3a..14bacc7e6cef 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -117,10 +117,9 @@ static inline unsigned int tipc_node(__u32 addr) /* * Publication scopes when binding port names and port name sequences */ - -#define TIPC_ZONE_SCOPE 1 -#define TIPC_CLUSTER_SCOPE 2 -#define TIPC_NODE_SCOPE 3 +#define TIPC_ZONE_SCOPE 1 +#define TIPC_CLUSTER_SCOPE 2 +#define TIPC_NODE_SCOPE 3 /* * Limiting values for messages diff --git a/net/tipc/group.c b/net/tipc/group.c index cf996bd6ec98..1908773c9fca 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -87,7 +87,6 @@ struct tipc_group { int subid; u32 type; u32 instance; - u32 domain; u32 scope; u32 portid; u16 member_cnt; @@ -158,6 +157,8 @@ int tipc_group_size(struct tipc_group *grp) struct tipc_group *tipc_group_create(struct net *net, u32 portid, struct tipc_group_req *mreq) { + u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; + bool global = mreq->scope != TIPC_NODE_SCOPE; struct tipc_group *grp; u32 type = mreq->type; @@ -171,15 +172,14 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->members = RB_ROOT; grp->net = net; grp->portid = portid; - grp->domain = addr_domain(net, mreq->scope); grp->type = type; grp->instance = mreq->instance; grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; - if (tipc_topsrv_kern_subscr(net, portid, type, - TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS, - 0, ~0, &grp->subid)) + filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, + filter, &grp->subid)) return grp; kfree(grp); return NULL; @@ -732,6 +732,9 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!grp) return; + if (grp->scope == TIPC_NODE_SCOPE && node != tipc_own_addr(grp->net)) + return; + m = tipc_group_find_member(grp, node, port); switch (msg_type(hdr)) { diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 60af9885f160..64cdd3c302b0 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -328,7 +328,8 @@ static struct publication *tipc_nameseq_insert_publ(struct net *net, list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_PUBLISHED, publ->ref, - publ->node, created_subseq); + publ->node, publ->scope, + created_subseq); } return publ; } @@ -398,7 +399,8 @@ found: list_for_each_entry_safe(s, st, &nseq->subscriptions, nameseq_list) { tipc_subscrp_report_overlap(s, publ->lower, publ->upper, TIPC_WITHDRAWN, publ->ref, - publ->node, removed_subseq); + publ->node, publ->scope, + removed_subseq); } return publ; @@ -435,6 +437,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, sseq->upper, TIPC_PUBLISHED, crs->ref, crs->node, + crs->scope, must_report); must_report = 0; } @@ -598,7 +601,7 @@ not_found: return ref; } -bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 scope, struct list_head *dsts, int *dstcnt, u32 exclude, bool all) { @@ -608,9 +611,6 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, struct name_seq *seq; struct sub_seq *sseq; - if (!tipc_in_scope(domain, self)) - return false; - *dstcnt = 0; rcu_read_lock(); seq = nametbl_find_seq(net, type); @@ -621,7 +621,7 @@ bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, if (likely(sseq)) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, publ->node)) + if (publ->scope != scope) continue; if (publ->ref == exclude && publ->node == self) continue; @@ -639,13 +639,14 @@ exit: return !list_empty(dsts); } -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports) +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports) { - struct name_seq *seq; - struct sub_seq *sseq; struct sub_seq *sseq_stop; struct name_info *info; + struct publication *p; + struct name_seq *seq; + struct sub_seq *sseq; int res = 0; rcu_read_lock(); @@ -657,15 +658,12 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); sseq_stop = seq->sseqs + seq->first_free; for (; sseq != sseq_stop; sseq++) { - struct publication *publ; - if (sseq->lower > upper) break; - info = sseq->info; - list_for_each_entry(publ, &info->node_list, node_list) { - if (publ->scope <= limit) - tipc_dest_push(dports, 0, publ->ref); + list_for_each_entry(p, &info->node_list, node_list) { + if (p->scope == scope || (!exact && p->scope < scope)) + tipc_dest_push(dports, 0, p->ref); } if (info->cluster_list_size != info->node_list_size) @@ -682,7 +680,7 @@ exit: * - Determines if any node local ports overlap */ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, + u32 upper, u32 scope, struct tipc_nlist *nodes) { struct sub_seq *sseq, *stop; @@ -701,7 +699,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (tipc_in_scope(domain, publ->node)) + if (publ->scope == scope) tipc_nlist_add(nodes, publ->node); } } @@ -713,7 +711,7 @@ exit: /* tipc_nametbl_build_group - build list of communication group members */ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, - u32 type, u32 domain) + u32 type, u32 scope) { struct sub_seq *sseq, *stop; struct name_info *info; @@ -731,7 +729,7 @@ void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, for (; sseq != stop; sseq++) { info = sseq->info; list_for_each_entry(p, &info->zone_list, zone_list) { - if (!tipc_in_scope(domain, p->node)) + if (p->scope != scope) continue; tipc_group_add_member(grp, p->node, p->ref, p->lower); } diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 73a148c85c15..b595d8aa00f0 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -100,8 +100,8 @@ struct name_table { int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); -int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, - u32 limit, struct list_head *dports); +int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, + u32 scope, bool exact, struct list_head *dports); void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, diff --git a/net/tipc/server.c b/net/tipc/server.c index 950c54cbcf3a..8ee5e86b7870 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -489,8 +489,8 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 filter, u32 lower, u32 upper, int *conid) +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid) { struct tipc_subscriber *scbr; struct tipc_subscr sub; diff --git a/net/tipc/server.h b/net/tipc/server.h index ea1effbff23e..17f49ee44cfd 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -41,6 +41,8 @@ #include #define TIPC_SERVER_NAME_LEN 32 +#define TIPC_SUB_CLUSTER_SCOPE 0x20 +#define TIPC_SUB_NODE_SCOPE 0x40 #define TIPC_SUB_NO_STATUS 0x80 /** @@ -84,8 +86,8 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); -bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, - u32 filter, u32 lower, u32 upper, int *conid); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, u32 lower, + u32 upper, u32 filter, int *conid); void tipc_topsrv_kern_unsubscr(struct net *net, int conid); /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e3a02f1fcab5..b24dab3996c9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -928,21 +928,22 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, struct list_head *cong_links = &tsk->cong_links; int blks = tsk_blocks(GROUP_H_SIZE + dlen); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct tipc_member *first = NULL; struct tipc_member *mbr = NULL; struct net *net = sock_net(sk); u32 node, port, exclude; - u32 type, inst, domain; struct list_head dsts; + u32 type, inst, scope; int lookups = 0; int dstcnt, rc; bool cong; INIT_LIST_HEAD(&dsts); - type = dest->addr.name.name.type; + type = msg_nametype(hdr); inst = dest->addr.name.name.instance; - domain = addr_domain(net, dest->scope); + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); while (++lookups < 4) { @@ -950,7 +951,7 @@ static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, /* Look for a non-congested destination member, if any */ while (1) { - if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, &dstcnt, exclude, false)) return -EHOSTUNREACH; tipc_dest_pop(&dsts, &node, &port); @@ -1079,22 +1080,23 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, { struct sock *sk = sock->sk; DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); - struct tipc_name_seq *seq = &dest->addr.nameseq; struct tipc_sock *tsk = tipc_sk(sk); struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; struct net *net = sock_net(sk); - u32 domain, exclude, dstcnt; + u32 type, inst, scope, exclude; struct list_head dsts; + u32 dstcnt; INIT_LIST_HEAD(&dsts); - if (seq->lower != seq->upper) - return -ENOTSUPP; - - domain = addr_domain(net, dest->scope); + type = msg_nametype(hdr); + inst = dest->addr.name.name.instance; + scope = msg_lookup_scope(hdr); exclude = tipc_group_exclude(grp); - if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, - &dsts, &dstcnt, exclude, true)) + + if (!tipc_nametbl_lookup(net, type, inst, scope, &dsts, + &dstcnt, exclude, true)) return -EHOSTUNREACH; if (dstcnt == 1) { @@ -1116,24 +1118,29 @@ static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - u32 scope = TIPC_CLUSTER_SCOPE; u32 self = tipc_own_addr(net); + u32 type, lower, upper, scope; struct sk_buff *skb, *_skb; - u32 lower = 0, upper = ~0; - struct sk_buff_head tmpq; u32 portid, oport, onode; + struct sk_buff_head tmpq; struct list_head dports; - struct tipc_msg *msg; - int user, mtyp, hsz; + struct tipc_msg *hdr; + int user, mtyp, hlen; + bool exact; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { - msg = buf_msg(skb); - user = msg_user(msg); - mtyp = msg_type(msg); + hdr = buf_msg(skb); + user = msg_user(hdr); + mtyp = msg_type(hdr); + hlen = skb_headroom(skb) + msg_hdr_sz(hdr); + oport = msg_origport(hdr); + onode = msg_orignode(hdr); + type = msg_nametype(hdr); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { spin_lock_bh(&inputq->lock); if (skb_peek(arrvq) == skb) { @@ -1144,21 +1151,31 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, spin_unlock_bh(&inputq->lock); continue; } - hsz = skb_headroom(skb) + msg_hdr_sz(msg); - oport = msg_origport(msg); - onode = msg_orignode(msg); - if (onode == self) - scope = TIPC_NODE_SCOPE; - - /* Create destination port list and message clones: */ - if (!msg_in_group(msg)) { - lower = msg_namelower(msg); - upper = msg_nameupper(msg); + + /* Group messages require exact scope match */ + if (msg_in_group(hdr)) { + lower = 0; + upper = ~0; + scope = msg_lookup_scope(hdr); + exact = true; + } else { + /* TIPC_NODE_SCOPE means "any scope" in this context */ + if (onode == self) + scope = TIPC_NODE_SCOPE; + else + scope = TIPC_CLUSTER_SCOPE; + exact = false; + lower = msg_namelower(hdr); + upper = msg_nameupper(hdr); } - tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, - scope, &dports); + + /* Create destination port list: */ + tipc_nametbl_mc_lookup(net, type, lower, upper, + scope, exact, &dports); + + /* Clone message per destination */ while (tipc_dest_pop(&dports, NULL, &portid)) { - _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); + _skb = __pskb_copy(skb, hlen, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); __skb_queue_tail(&tmpq, _skb); @@ -2731,7 +2748,6 @@ void tipc_sk_rht_destroy(struct net *net) static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) { struct net *net = sock_net(&tsk->sk); - u32 domain = addr_domain(net, mreq->scope); struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq seq; @@ -2739,6 +2755,8 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) if (mreq->type < TIPC_RESERVED_TYPES) return -EACCES; + if (mreq->scope > TIPC_NODE_SCOPE) + return -EINVAL; if (grp) return -EACCES; grp = tipc_group_create(net, tsk->portid, mreq); @@ -2751,7 +2769,7 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) seq.type = mreq->type; seq.lower = mreq->instance; seq.upper = seq.lower; - tipc_nametbl_build_group(net, grp, mreq->type, domain); + tipc_nametbl_build_group(net, grp, mreq->type, mreq->scope); rc = tipc_sk_publish(tsk, mreq->scope, &seq); if (rc) { tipc_group_delete(net, grp); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1052341a0ea9..44df528ed6ab 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -118,15 +118,19 @@ void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, u32 port_ref, - u32 node, int must) + u32 node, u32 scope, int must) { + u32 filter = htohl(sub->evt.s.filter, sub->swap); struct tipc_name_seq seq; tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; - if (!must && - !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) + if (!must && !(filter & TIPC_SUB_PORTS)) + return; + if (filter & TIPC_SUB_CLUSTER_SCOPE && scope == TIPC_NODE_SCOPE) + return; + if (filter & TIPC_SUB_NODE_SCOPE && scope != TIPC_NODE_SCOPE) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ee52957dc952..f3edca775d9f 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -71,7 +71,7 @@ int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper); void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, u32 found_upper, u32 event, - u32 port_ref, u32 node, int must); + u32 port_ref, u32 node, u32 scope, int must); void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, struct tipc_name_seq *out); u32 tipc_subscrp_convert_seq_type(u32 type, int swap); -- cgit v1.2.3 From eb929a91b213d2a72c5a8b4af9a1acf63bfb8287 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Mon, 8 Jan 2018 21:03:31 +0100 Subject: tipc: improve poll() for group member socket The current criteria for returning POLLOUT from a group member socket is too simplistic. It basically returns POLLOUT as soon as the group has external destinations, something obviously leading to a lot of spinning during destination congestion situations. At the same time, the internal congestion handling is unnecessarily complex. We now change this as follows. - We introduce an 'open' flag in struct tipc_group. This flag is used only to help poll() get the setting of POLLOUT right, and *not* for congeston handling as such. This means that a user can choose to ignore an EAGAIN for a destination and go on sending messages to other destinations in the group if he wants to. - The flag is set to false every time we return EAGAIN on a send call. - The flag is set to true every time any member, i.e., not necessarily the member that caused EAGAIN, is removed from the small_win list. - We remove the group member 'usr_pending' flag. The size of the send window and presence in the 'small_win' list is sufficient criteria for recognizing congestion. This solution seems to be a reasonable compromise between 'anycast', which is normally not waiting for POLLOUT for a specific destination, and the other three send modes, which are. Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 64 +++++++++++++++++++++++++++++++------------------------ net/tipc/group.h | 2 +- net/tipc/socket.c | 8 +++---- 3 files changed, 41 insertions(+), 33 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/group.c b/net/tipc/group.c index 1908773c9fca..497ee34bfab9 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -74,7 +74,6 @@ struct tipc_member { u16 bc_rcv_nxt; u16 bc_syncpt; u16 bc_acked; - bool usr_pending; }; struct tipc_group { @@ -96,11 +95,27 @@ struct tipc_group { u16 bc_ackers; bool loopback; bool events; + bool open; }; static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); +bool tipc_group_is_open(struct tipc_group *grp) +{ + return grp->open; +} + +static void tipc_group_open(struct tipc_member *m, bool *wakeup) +{ + *wakeup = false; + if (list_empty(&m->small_win)) + return; + list_del_init(&m->small_win); + m->group->open = true; + *wakeup = true; +} + static void tipc_group_decr_active(struct tipc_group *grp, struct tipc_member *m) { @@ -406,20 +421,20 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int adv, state; m = tipc_group_find_dest(grp, dnode, dport); - *mbr = m; - if (!m) + if (!tipc_group_is_receiver(m)) { + *mbr = NULL; return false; - if (m->usr_pending) - return true; + } + *mbr = m; + if (m->window >= len) return false; - m->usr_pending = true; + + grp->open = false; /* If not fully advertised, do it now to prevent mutual blocking */ adv = m->advertised; state = m->state; - if (state < MBR_JOINED) - return true; if (state == MBR_JOINED && adv == ADV_IDLE) return true; if (state == MBR_ACTIVE && adv == ADV_ACTIVE) @@ -437,9 +452,10 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) struct tipc_member *m = NULL; /* If prev bcast was replicast, reject until all receivers have acked */ - if (grp->bc_ackers) + if (grp->bc_ackers) { + grp->open = false; return true; - + } if (list_empty(&grp->small_win)) return false; @@ -754,9 +770,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, /* Member can be taken into service */ m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); tipc_group_update_member(m, 0); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); tipc_group_create_event(grp, m, TIPC_PUBLISHED, @@ -767,8 +781,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, return; m->bc_syncpt = msg_grp_bc_syncpt(hdr); list_del_init(&m->list); - list_del_init(&m->small_win); - *usr_wakeup = true; + tipc_group_open(m, usr_wakeup); tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; tipc_group_create_event(grp, m, TIPC_WITHDRAWN, @@ -778,26 +791,25 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (!m) return; m->window += msg_adv_win(hdr); - *usr_wakeup = m->usr_pending; - m->usr_pending = false; - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); return; case GRP_ACK_MSG: if (!m) return; m->bc_acked = msg_grp_bc_acked(hdr); if (--grp->bc_ackers) - break; + return; + list_del_init(&m->small_win); + m->group->open = true; *usr_wakeup = true; - m->usr_pending = false; + tipc_group_update_member(m, 0); return; case GRP_RECLAIM_MSG: if (!m) return; - *usr_wakeup = m->usr_pending; - m->usr_pending = false; tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); m->window = ADV_IDLE; + tipc_group_open(m, usr_wakeup); return; case GRP_REMIT_MSG: if (!m || m->state != MBR_RECLAIMING) @@ -883,9 +895,7 @@ void tipc_group_member_evt(struct tipc_group *grp, /* Member can be taken into service */ m->instance = instance; m->state = MBR_JOINED; - *usr_wakeup = true; - m->usr_pending = false; - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); tipc_group_update_member(m, 0); tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); tipc_group_create_event(grp, m, TIPC_PUBLISHED, @@ -895,12 +905,10 @@ void tipc_group_member_evt(struct tipc_group *grp, if (!m) break; - *usr_wakeup = true; - m->usr_pending = false; tipc_group_decr_active(grp, m); m->state = MBR_LEAVING; list_del_init(&m->list); - list_del_init(&m->small_win); + tipc_group_open(m, usr_wakeup); /* Only send event if no LEAVE message can be expected */ if (!tipc_node_is_up(net, node)) diff --git a/net/tipc/group.h b/net/tipc/group.h index dee79477d499..f4a596ed9848 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -67,9 +67,9 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); +bool tipc_group_is_open(struct tipc_group *grp); void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); void tipc_group_update_member(struct tipc_member *m, int len); -int tipc_group_size(struct tipc_group *grp); #endif diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b24dab3996c9..1f236271766c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -715,7 +715,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp = tsk->group; + struct tipc_group *grp; u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -736,9 +736,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!grp || tipc_group_size(grp)) - if (!tsk->cong_link_cnt) - revents |= POLLOUT; + grp = tsk->group; + if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt) + revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; if (skb_queue_empty(&sk->sk_receive_queue)) -- cgit v1.2.3 From febafc8455fdbb0ba53d596075068a683b75f355 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 10 Jan 2018 21:08:50 +0100 Subject: tipc: fix a potental access after delete in tipc_sk_join() In commit d12d2e12cec2 "tipc: send out join messages as soon as new member is discovered") we added a call to the function tipc_group_join() without considering the case that the preceding tipc_sk_publish() might have failed, and the group item already deleted. We fix this by returning from tipc_sk_join() directly after the failed tipc_sk_publish. Reported-by: syzbot+e3eeae78ea88b8d6d858@syzkaller.appspotmail.com Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1f236271766c..f38264db45ae 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2774,6 +2774,7 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) if (rc) { tipc_group_delete(net, grp); tsk->group = NULL; + return rc; } /* Eliminate any risk that a broadcast overtakes sent JOINs */ tsk->mc_method.rcast = true; -- cgit v1.2.3 From e9a034456a8cd766795610aa5065263147e35228 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 12 Jan 2018 20:56:50 +0100 Subject: tipc: fix bug during lookup of multicast destination nodes In commit 232d07b74a33 ("tipc: improve groupcast scope handling") we inadvertently broke non-group multicast transmission when changing the parameter 'domain' to 'scope' in the function tipc_nametbl_lookup_dst_nodes(). We missed to make the corresponding change in the calling function, with the result that the lookup always fails. A closer anaysis reveals that this parameter is not needed at all. Non-group multicast is hard coded to use CLUSTER_SCOPE, and in the current implementation this will be delivered to all matching destinations except those which are published with NODE_SCOPE on other nodes. Since such publications never will be visible on the sending node anyway, it makes no sense to discriminate by scope at all. We now remove this parameter altogether. Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/name_table.c | 6 ++---- net/tipc/name_table.h | 3 +-- net/tipc/socket.c | 3 +-- 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 64cdd3c302b0..ed0457cc99d6 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -680,8 +680,7 @@ exit: * - Determines if any node local ports overlap */ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 scope, - struct tipc_nlist *nodes) + u32 upper, struct tipc_nlist *nodes) { struct sub_seq *sseq, *stop; struct publication *publ; @@ -699,8 +698,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { - if (publ->scope == scope) - tipc_nlist_add(nodes, publ->node); + tipc_nlist_add(nodes, publ->node); } } spin_unlock_bh(&seq->lock); diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index b595d8aa00f0..f56e7cb3d436 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -105,8 +105,7 @@ int tipc_nametbl_mc_lookup(struct net *net, u32 type, u32 lower, u32 upper, void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, - u32 upper, u32 domain, - struct tipc_nlist *nodes); + u32 upper, struct tipc_nlist *nodes); bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, struct list_head *dsts, int *dstcnt, u32 exclude, bool all); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f38264db45ae..d799e50ff722 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -772,7 +772,6 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct net *net = sock_net(sk); int mtu = tipc_bcast_get_mtu(net); struct tipc_mc_method *method = &tsk->mc_method; - u32 domain = addr_domain(net, TIPC_CLUSTER_SCOPE); struct sk_buff_head pkts; struct tipc_nlist dsts; int rc; @@ -788,7 +787,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, /* Lookup destination nodes */ tipc_nlist_init(&dsts, tipc_own_addr(net)); tipc_nametbl_lookup_dst_nodes(net, seq->type, seq->lower, - seq->upper, domain, &dsts); + seq->upper, &dsts); if (!dsts.local && !dsts.remote) return -EHOSTUNREACH; -- cgit v1.2.3 From 60c2530696320ee6ffe4491c17079fa403790c98 Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Wed, 17 Jan 2018 16:42:46 +0100 Subject: tipc: fix race between poll() and setsockopt() Letting tipc_poll() dereference a socket's pointer to struct tipc_group entails a race risk, as the group item may be deleted in a concurrent tipc_sk_join() or tipc_sk_leave() thread. We now move the 'open' flag in struct tipc_group to struct tipc_sock, and let the former retain only a pointer to the moved field. This will eliminate the race risk. Reported-by: syzbot+799dafde0286795858ac@syzkaller.appspotmail.com Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/group.c | 19 ++++++++----------- net/tipc/group.h | 4 ++-- net/tipc/socket.c | 7 +++---- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'net/tipc/socket.c') diff --git a/net/tipc/group.c b/net/tipc/group.c index 497ee34bfab9..122162a31816 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -93,26 +93,21 @@ struct tipc_group { u16 max_active; u16 bc_snd_nxt; u16 bc_ackers; + bool *open; bool loopback; bool events; - bool open; }; static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, int mtyp, struct sk_buff_head *xmitq); -bool tipc_group_is_open(struct tipc_group *grp) -{ - return grp->open; -} - static void tipc_group_open(struct tipc_member *m, bool *wakeup) { *wakeup = false; if (list_empty(&m->small_win)) return; list_del_init(&m->small_win); - m->group->open = true; + *m->group->open = true; *wakeup = true; } @@ -170,7 +165,8 @@ int tipc_group_size(struct tipc_group *grp) } struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq) + struct tipc_group_req *mreq, + bool *group_is_open) { u32 filter = TIPC_SUB_PORTS | TIPC_SUB_NO_STATUS; bool global = mreq->scope != TIPC_NODE_SCOPE; @@ -192,6 +188,7 @@ struct tipc_group *tipc_group_create(struct net *net, u32 portid, grp->scope = mreq->scope; grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + grp->open = group_is_open; filter |= global ? TIPC_SUB_CLUSTER_SCOPE : TIPC_SUB_NODE_SCOPE; if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, filter, &grp->subid)) @@ -430,7 +427,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, if (m->window >= len) return false; - grp->open = false; + *grp->open = false; /* If not fully advertised, do it now to prevent mutual blocking */ adv = m->advertised; @@ -453,7 +450,7 @@ bool tipc_group_bc_cong(struct tipc_group *grp, int len) /* If prev bcast was replicast, reject until all receivers have acked */ if (grp->bc_ackers) { - grp->open = false; + *grp->open = false; return true; } if (list_empty(&grp->small_win)) @@ -800,7 +797,7 @@ void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, if (--grp->bc_ackers) return; list_del_init(&m->small_win); - m->group->open = true; + *m->group->open = true; *usr_wakeup = true; tipc_group_update_member(m, 0); return; diff --git a/net/tipc/group.h b/net/tipc/group.h index f4a596ed9848..5996af6e9f1d 100644 --- a/net/tipc/group.h +++ b/net/tipc/group.h @@ -43,7 +43,8 @@ struct tipc_member; struct tipc_msg; struct tipc_group *tipc_group_create(struct net *net, u32 portid, - struct tipc_group_req *mreq); + struct tipc_group_req *mreq, + bool *group_is_open); void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcv_buf); void tipc_group_delete(struct net *net, struct tipc_group *grp); void tipc_group_add_member(struct tipc_group *grp, u32 node, @@ -67,7 +68,6 @@ void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, int len, struct tipc_member **m); bool tipc_group_bc_cong(struct tipc_group *grp, int len); -bool tipc_group_is_open(struct tipc_group *grp); void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, u32 port, struct sk_buff_head *xmitq); u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d799e50ff722..473a096b6fba 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -116,6 +116,7 @@ struct tipc_sock { struct tipc_mc_method mc_method; struct rcu_head rcu; struct tipc_group *group; + bool group_is_open; }; static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); @@ -715,7 +716,6 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - struct tipc_group *grp; u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); @@ -736,8 +736,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - grp = tsk->group; - if ((!grp || tipc_group_is_open(grp)) && !tsk->cong_link_cnt) + if (tsk->group_is_open && !tsk->cong_link_cnt) revents |= POLLOUT; if (!tipc_sk_type_connectionless(sk)) break; @@ -2758,7 +2757,7 @@ static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) return -EINVAL; if (grp) return -EACCES; - grp = tipc_group_create(net, tsk->portid, mreq); + grp = tipc_group_create(net, tsk->portid, mreq, &tsk->group_is_open); if (!grp) return -ENOMEM; tsk->group = grp; -- cgit v1.2.3