diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 12:03:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-07-27 12:03:20 -0700 |
commit | 468fc7ed5537615efe671d94248446ac24679773 (patch) | |
tree | 27bc9de792e863d6ec1630927b77ac9e7dabb38a /net/tipc | |
parent | 08fd8c17686c6b09fa410a26d516548dd80ff147 (diff) | |
parent | 36232012344b8db67052432742deaf17f82e70e6 (diff) | |
download | linux-468fc7ed5537615efe671d94248446ac24679773.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) Unified UDP encapsulation offload methods for drivers, from
Alexander Duyck.
2) Make DSA binding more sane, from Andrew Lunn.
3) Support QCA9888 chips in ath10k, from Anilkumar Kolli.
4) Several workqueue usage cleanups, from Bhaktipriya Shridhar.
5) Add XDP (eXpress Data Path), essentially running BPF programs on RX
packets as soon as the device sees them, with the option to mirror
the packet on TX via the same interface. From Brenden Blanco and
others.
6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet.
7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli.
8) Simplify netlink conntrack entry layout, from Florian Westphal.
9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido
Schimmel, Yotam Gigi, and Jiri Pirko.
10) Add SKB array infrastructure and convert tun and macvtap over to it.
From Michael S Tsirkin and Jason Wang.
11) Support qdisc packet injection in pktgen, from John Fastabend.
12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy.
13) Add NV congestion control support to TCP, from Lawrence Brakmo.
14) Add GSO support to SCTP, from Marcelo Ricardo Leitner.
15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni.
16) Support MPLS over IPV4, from Simon Horman.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits)
xgene: Fix build warning with ACPI disabled.
be2net: perform temperature query in adapter regardless of its interface state
l2tp: Correctly return -EBADF from pppol2tp_getname.
net/mlx5_core/health: Remove deprecated create_singlethread_workqueue
net: ipmr/ip6mr: update lastuse on entry change
macsec: ensure rx_sa is set when validation is disabled
tipc: dump monitor attributes
tipc: add a function to get the bearer name
tipc: get monitor threshold for the cluster
tipc: make cluster size threshold for monitoring configurable
tipc: introduce constants for tipc address validation
net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update()
MAINTAINERS: xgene: Add driver and documentation path
Documentation: dtb: xgene: Add MDIO node
dtb: xgene: Add MDIO node
drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset
drivers: net: xgene: Use exported functions
drivers: net: xgene: Enable MDIO driver
drivers: net: xgene: Add backward compatibility
drivers: net: phy: xgene: Add MDIO driver
...
Diffstat (limited to 'net/tipc')
-rw-r--r-- | net/tipc/Makefile | 2 | ||||
-rw-r--r-- | net/tipc/addr.h | 6 | ||||
-rw-r--r-- | net/tipc/bearer.c | 33 | ||||
-rw-r--r-- | net/tipc/bearer.h | 3 | ||||
-rw-r--r-- | net/tipc/core.c | 1 | ||||
-rw-r--r-- | net/tipc/core.h | 15 | ||||
-rw-r--r-- | net/tipc/discover.c | 5 | ||||
-rw-r--r-- | net/tipc/link.c | 51 | ||||
-rw-r--r-- | net/tipc/monitor.c | 803 | ||||
-rw-r--r-- | net/tipc/monitor.h | 82 | ||||
-rw-r--r-- | net/tipc/netlink.c | 27 | ||||
-rw-r--r-- | net/tipc/netlink.h | 1 | ||||
-rw-r--r-- | net/tipc/node.c | 213 | ||||
-rw-r--r-- | net/tipc/node.h | 5 | ||||
-rw-r--r-- | net/tipc/server.c | 3 | ||||
-rw-r--r-- | net/tipc/udp_media.c | 24 |
16 files changed, 1208 insertions, 66 deletions
diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 57e460be4692..31b9f9c52974 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_TIPC) := tipc.o tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ - name_distr.o subscr.o name_table.o net.o \ + name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ server.o socket.o diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 93f7c983be33..bebb347803ce 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -43,9 +43,6 @@ #include <net/netns/generic.h> #include "core.h" -#define TIPC_ZONE_MASK 0xff000000u -#define TIPC_CLUSTER_MASK 0xfffff000u - static inline u32 tipc_own_addr(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -60,7 +57,7 @@ static inline u32 tipc_zone_mask(u32 addr) static inline u32 tipc_cluster_mask(u32 addr) { - return addr & TIPC_CLUSTER_MASK; + return addr & TIPC_ZONE_CLUSTER_MASK; } u32 tipc_own_addr(struct net *net); @@ -73,4 +70,5 @@ int tipc_addr_node_valid(u32 addr); int tipc_in_scope(u32 domain, u32 addr); int tipc_addr_scope(u32 domain); char *tipc_addr_string_fill(char *string, u32 addr); + #endif diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a597708ae381..65b1bbf133bd 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -1,7 +1,7 @@ /* * net/tipc/bearer.c: TIPC bearer code * - * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2004-2006, 2010-2013, Wind River Systems * All rights reserved. * @@ -39,6 +39,7 @@ #include "bearer.h" #include "link.h" #include "discover.h" +#include "monitor.h" #include "bcast.h" #include "netlink.h" @@ -170,6 +171,27 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) return NULL; } +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. + */ +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; + + strcpy(name, b->name); + return 0; +} + void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -224,7 +246,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (tipc_addr_domain_valid(disc_domain) && (disc_domain != tn->own_addr)) { if (tipc_in_scope(disc_domain, tn->own_addr)) { - disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; + disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ @@ -313,6 +335,10 @@ restart: rcu_assign_pointer(tn->bearer_list[bearer_id], b); if (skb) tipc_bearer_xmit_skb(net, bearer_id, skb, &b->bcast_addr); + + if (tipc_mon_create(net, bearer_id)) + return -ENOMEM; + pr_info("Enabled bearer <%s>, discovery domain %s, priority %u\n", name, tipc_addr_string_fill(addr_string, disc_domain), priority); @@ -363,6 +389,7 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) tipc_disc_delete(b->link_req); RCU_INIT_POINTER(tn->bearer_list[bearer_id], NULL); kfree_rcu(b, rcu); + tipc_mon_delete(net, bearer_id); } int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, @@ -826,7 +853,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tn->own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 60e49c3be19c..43757f1f9cb3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -1,7 +1,7 @@ /* * net/tipc/bearer.h: Include file for TIPC bearer code * - * Copyright (c) 1996-2006, 2013-2014, Ericsson AB + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB * Copyright (c) 2005, 2010-2011, Wind River Systems * All rights reserved. * @@ -197,6 +197,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); void tipc_bearer_reset_all(struct net *net); int tipc_bearer_setup(void); diff --git a/net/tipc/core.c b/net/tipc/core.c index fe1b062c4f18..236b043a4156 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -57,6 +57,7 @@ static int __net_init tipc_init_net(struct net *net) tn->net_id = 4711; tn->own_addr = 0; + tn->mon_threshold = TIPC_DEF_MON_THRESHOLD; get_random_bytes(&tn->random, sizeof(int)); INIT_LIST_HEAD(&tn->node_list); spin_lock_init(&tn->node_list_lock); diff --git a/net/tipc/core.h b/net/tipc/core.h index eff58dc53aa1..a1845fb27d80 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -66,11 +66,13 @@ struct tipc_bc_base; struct tipc_link; struct tipc_name_table; struct tipc_server; +struct tipc_monitor; #define TIPC_MOD_VER "2.0.0" -#define NODE_HTABLE_SIZE 512 -#define MAX_BEARERS 3 +#define NODE_HTABLE_SIZE 512 +#define MAX_BEARERS 3 +#define TIPC_DEF_MON_THRESHOLD 32 extern int tipc_net_id __read_mostly; extern int sysctl_tipc_rmem[3] __read_mostly; @@ -88,6 +90,10 @@ struct tipc_net { u32 num_nodes; u32 num_links; + /* Neighbor monitoring list */ + struct tipc_monitor *monitors[MAX_BEARERS]; + int mon_threshold; + /* Bearer list */ struct tipc_bearer __rcu *bearer_list[MAX_BEARERS + 1]; @@ -126,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline unsigned int tipc_hashfn(u32 addr) +{ + return addr & (NODE_HTABLE_SIZE - 1); +} + static inline u16 mod(u16 x) { return x & 0xffffu; diff --git a/net/tipc/discover.c b/net/tipc/discover.c index ad9d477cc242..6b109a808d4c 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -135,9 +135,12 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, u16 caps = msg_node_capabilities(hdr); bool respond = false; bool dupl_addr = false; + int err; - bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); + err = bearer->media->msg2addr(bearer, &maddr, msg_media_addr(hdr)); kfree_skb(skb); + if (err) + return; /* Ensure message from node is valid and communication is permitted */ if (net_id != tn->net_id) diff --git a/net/tipc/link.c b/net/tipc/link.c index 7d89f8713d49..877d94f34814 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -42,6 +42,7 @@ #include "name_distr.h" #include "discover.h" #include "netlink.h" +#include "monitor.h" #include <linux/pkt_sched.h> @@ -87,7 +88,6 @@ struct tipc_stats { * @peer_bearer_id: bearer id used by link's peer endpoint * @bearer_id: local bearer id used by link * @tolerance: minimum link continuity loss needed to reset link [in ms] - * @keepalive_intv: link keepalive timer interval * @abort_limit: # of unacknowledged continuity probes needed to reset link * @state: current state of link FSM * @peer_caps: bitmap describing capabilities of peer node @@ -96,6 +96,7 @@ struct tipc_stats { * @pmsg: convenience pointer to "proto_msg" field * @priority: current link priority * @net_plane: current link network plane ('A' through 'H') + * @mon_state: cookie with information needed by link monitor * @backlog_limit: backlog queue congestion thresholds (indexed by importance) * @exp_msg_count: # of tunnelled messages expected during link changeover * @reset_rcv_checkpt: seq # of last acknowledged message at time of link reset @@ -131,7 +132,6 @@ struct tipc_link { u32 peer_bearer_id; u32 bearer_id; u32 tolerance; - unsigned long keepalive_intv; u32 abort_limit; u32 state; u16 peer_caps; @@ -140,6 +140,7 @@ struct tipc_link { char if_name[TIPC_MAX_IF_NAME]; u32 priority; char net_plane; + struct tipc_mon_state mon_state; u16 rst_cnt; /* Failover/synch */ @@ -713,18 +714,25 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) bool setup = false; u16 bc_snt = l->bc_sndlink->snd_nxt - 1; u16 bc_acked = l->bc_rcvlink->acked; - - link_profile_stats(l); + struct tipc_mon_state *mstate = &l->mon_state; switch (l->state) { case LINK_ESTABLISHED: case LINK_SYNCHING: - if (l->silent_intv_cnt > l->abort_limit) - return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); mtyp = STATE_MSG; + link_profile_stats(l); + tipc_mon_get_state(l->net, l->addr, mstate, l->bearer_id); + if (mstate->reset || (l->silent_intv_cnt > l->abort_limit)) + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); state = bc_acked != bc_snt; - probe = l->silent_intv_cnt; - l->silent_intv_cnt++; + state |= l->bc_rcvlink->rcv_unacked; + state |= l->rcv_unacked; + state |= !skb_queue_empty(&l->transmq); + state |= !skb_queue_empty(&l->deferdq); + probe = mstate->probing; + probe |= l->silent_intv_cnt; + if (probe || mstate->monitoring) + l->silent_intv_cnt++; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; @@ -835,6 +843,7 @@ void tipc_link_reset(struct tipc_link *l) l->stats.recv_info = 0; l->stale_count = 0; l->bc_peer_is_up = false; + memset(&l->mon_state, 0, sizeof(l->mon_state)); tipc_link_reset_stats(l); } @@ -1243,6 +1252,9 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, struct tipc_msg *hdr; struct sk_buff_head *dfq = &l->deferdq; bool node_up = link_is_up(l->bc_rcvlink); + struct tipc_mon_state *mstate = &l->mon_state; + int dlen = 0; + void *data; /* Don't send protocol message during reset or link failover */ if (tipc_link_is_blocked(l)) @@ -1255,12 +1267,13 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, rcvgap = buf_seqno(skb_peek(dfq)) - l->rcv_nxt; skb = tipc_msg_create(LINK_PROTOCOL, mtyp, INT_H_SIZE, - TIPC_MAX_IF_NAME, l->addr, + tipc_max_domain_size, l->addr, tipc_own_addr(l->net), 0, 0, 0); if (!skb) return; hdr = buf_msg(skb); + data = msg_data(hdr); msg_set_session(hdr, l->session); msg_set_bearer_id(hdr, l->bearer_id); msg_set_net_plane(hdr, l->net_plane); @@ -1276,14 +1289,18 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (mtyp == STATE_MSG) { msg_set_seq_gap(hdr, rcvgap); - msg_set_size(hdr, INT_H_SIZE); msg_set_probe(hdr, probe); + tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); + msg_set_size(hdr, INT_H_SIZE + dlen); + skb_trim(skb, INT_H_SIZE + dlen); l->stats.sent_states++; l->rcv_unacked = 0; } else { /* RESET_MSG or ACTIVATE_MSG */ msg_set_max_pkt(hdr, l->advertised_mtu); - strcpy(msg_data(hdr), l->if_name); + strcpy(data, l->if_name); + msg_set_size(hdr, INT_H_SIZE + TIPC_MAX_IF_NAME); + skb_trim(skb, INT_H_SIZE + TIPC_MAX_IF_NAME); } if (probe) l->stats.sent_probes++; @@ -1376,7 +1393,9 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 peers_tol = msg_link_tolerance(hdr); u16 peers_prio = msg_linkprio(hdr); u16 rcv_nxt = l->rcv_nxt; + u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + void *data; char *if_name; int rc = 0; @@ -1386,6 +1405,10 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, if (tipc_own_addr(l->net) > msg_prevnode(hdr)) l->net_plane = msg_net_plane(hdr); + skb_linearize(skb); + hdr = buf_msg(skb); + data = msg_data(hdr); + switch (mtyp) { case RESET_MSG: @@ -1396,8 +1419,6 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* fall thru' */ case ACTIVATE_MSG: - skb_linearize(skb); - hdr = buf_msg(skb); /* Complete own link name with peer's interface name */ if_name = strrchr(l->name, ':') + 1; @@ -1405,7 +1426,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, break; if (msg_data_sz(hdr) < TIPC_MAX_IF_NAME) break; - strncpy(if_name, msg_data(hdr), TIPC_MAX_IF_NAME); + strncpy(if_name, data, TIPC_MAX_IF_NAME); /* Update own tolerance if peer indicates a non-zero value */ if (in_range(peers_tol, TIPC_MIN_LINK_TOL, TIPC_MAX_LINK_TOL)) @@ -1453,6 +1474,8 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, rc = TIPC_LINK_UP_EVT; break; } + tipc_mon_rcv(l->net, data, dlen, l->addr, + &l->mon_state, l->bearer_id); /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c new file mode 100644 index 000000000000..be70a57c1ff9 --- /dev/null +++ b/net/tipc/monitor.c @@ -0,0 +1,803 @@ +/* + * net/tipc/monitor.c + * + * Copyright (c) 2016, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <net/genetlink.h> +#include "core.h" +#include "addr.h" +#include "monitor.h" +#include "bearer.h" + +#define MAX_MON_DOMAIN 64 +#define MON_TIMEOUT 120000 +#define MAX_PEER_DOWN_EVENTS 4 + +/* struct tipc_mon_domain: domain record to be transferred between peers + * @len: actual size of domain record + * @gen: current generation of sender's domain + * @ack_gen: most recent generation of self's domain acked by peer + * @member_cnt: number of domain member nodes described in this record + * @up_map: bit map indicating which of the members the sender considers up + * @members: identity of the domain members + */ +struct tipc_mon_domain { + u16 len; + u16 gen; + u16 ack_gen; + u16 member_cnt; + u64 up_map; + u32 members[MAX_MON_DOMAIN]; +}; + +/* struct tipc_peer: state of a peer node and its domain + * @addr: tipc node identity of peer + * @head_map: shows which other nodes currently consider peer 'up' + * @domain: most recent domain record from peer + * @hash: position in hashed lookup list + * @list: position in linked list, in circular ascending order by 'addr' + * @applied: number of reported domain members applied on this monitor list + * @is_up: peer is up as seen from this node + * @is_head: peer is assigned domain head as seen from this node + * @is_local: peer is in local domain and should be continuously monitored + * @down_cnt: - numbers of other peers which have reported this on lost + */ +struct tipc_peer { + u32 addr; + struct tipc_mon_domain *domain; + struct hlist_node hash; + struct list_head list; + u8 applied; + u8 down_cnt; + bool is_up; + bool is_head; + bool is_local; +}; + +struct tipc_monitor { + struct hlist_head peers[NODE_HTABLE_SIZE]; + int peer_cnt; + struct tipc_peer *self; + rwlock_t lock; + struct tipc_mon_domain cache; + u16 list_gen; + u16 dom_gen; + struct net *net; + struct timer_list timer; + unsigned long timer_intv; +}; + +static struct tipc_monitor *tipc_monitor(struct net *net, int bearer_id) +{ + return tipc_net(net)->monitors[bearer_id]; +} + +const int tipc_max_domain_size = sizeof(struct tipc_mon_domain); + +/* dom_rec_len(): actual length of domain record for transport + */ +static int dom_rec_len(struct tipc_mon_domain *dom, u16 mcnt) +{ + return ((void *)&dom->members - (void *)dom) + (mcnt * sizeof(u32)); +} + +/* dom_size() : calculate size of own domain based on number of peers + */ +static int dom_size(int peers) +{ + int i = 0; + + while ((i * i) < peers) + i++; + return i < MAX_MON_DOMAIN ? i : MAX_MON_DOMAIN; +} + +static void map_set(u64 *up_map, int i, unsigned int v) +{ + *up_map &= ~(1ULL << i); + *up_map |= ((u64)v << i); +} + +static int map_get(u64 up_map, int i) +{ + return (up_map & (1 << i)) >> i; +} + +static struct tipc_peer *peer_prev(struct tipc_peer *peer) +{ + return list_last_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_nxt(struct tipc_peer *peer) +{ + return list_first_entry(&peer->list, struct tipc_peer, list); +} + +static struct tipc_peer *peer_head(struct tipc_peer *peer) +{ + while (!peer->is_head) + peer = peer_prev(peer); + return peer; +} + +static struct tipc_peer *get_peer(struct tipc_monitor *mon, u32 addr) +{ + struct tipc_peer *peer; + unsigned int thash = tipc_hashfn(addr); + + hlist_for_each_entry(peer, &mon->peers[thash], hash) { + if (peer->addr == addr) + return peer; + } + return NULL; +} + +static struct tipc_peer *get_self(struct net *net, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + + return mon->self; +} + +static inline bool tipc_mon_is_active(struct net *net, struct tipc_monitor *mon) +{ + struct tipc_net *tn = tipc_net(net); + + return mon->peer_cnt > tn->mon_threshold; +} + +/* mon_identify_lost_members() : - identify amd mark potentially lost members + */ +static void mon_identify_lost_members(struct tipc_peer *peer, + struct tipc_mon_domain *dom_bef, + int applied_bef) +{ + struct tipc_peer *member = peer; + struct tipc_mon_domain *dom_aft = peer->domain; + int applied_aft = peer->applied; + int i; + + for (i = 0; i < applied_bef; i++) { + member = peer_nxt(member); + + /* Do nothing if self or peer already see member as down */ + if (!member->is_up || !map_get(dom_bef->up_map, i)) + continue; + + /* Loss of local node must be detected by active probing */ + if (member->is_local) + continue; + + /* Start probing if member was removed from applied domain */ + if (!applied_aft || (applied_aft < i)) { + member->down_cnt = 1; + continue; + } + + /* Member loss is confirmed if it is still in applied domain */ + if (!map_get(dom_aft->up_map, i)) + member->down_cnt++; + } +} + +/* mon_apply_domain() : match a peer's domain record against monitor list + */ +static void mon_apply_domain(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + struct tipc_mon_domain *dom = peer->domain; + struct tipc_peer *member; + u32 addr; + int i; + + if (!dom || !peer->is_up) + return; + + /* Scan across domain members and match against monitor list */ + peer->applied = 0; + member = peer_nxt(peer); + for (i = 0; i < dom->member_cnt; i++) { + addr = dom->members[i]; + if (addr != member->addr) + return; + peer->applied++; + member = peer_nxt(member); + } +} + +/* mon_update_local_domain() : update after peer addition/removal/up/down + */ +static void mon_update_local_domain(struct tipc_monitor *mon) +{ + struct tipc_peer *self = mon->self; + struct tipc_mon_domain *cache = &mon->cache; + struct tipc_mon_domain *dom = self->domain; + struct tipc_peer *peer = self; + u64 prev_up_map = dom->up_map; + u16 member_cnt, i; + bool diff; + + /* Update local domain size based on current size of cluster */ + member_cnt = dom_size(mon->peer_cnt) - 1; + self->applied = member_cnt; + + /* Update native and cached outgoing local domain records */ + dom->len = dom_rec_len(dom, member_cnt); + diff = dom->member_cnt != member_cnt; + dom->member_cnt = member_cnt; + for (i = 0; i < member_cnt; i++) { + peer = peer_nxt(peer); + diff |= dom->members[i] != peer->addr; + dom->members[i] = peer->addr; + map_set(&dom->up_map, i, peer->is_up); + cache->members[i] = htonl(peer->addr); + } + diff |= dom->up_map != prev_up_map; + if (!diff) + return; + dom->gen = ++mon->dom_gen; + cache->len = htons(dom->len); + cache->gen = htons(dom->gen); + cache->member_cnt = htons(member_cnt); + cache->up_map = cpu_to_be64(dom->up_map); + mon_apply_domain(mon, self); +} + +/* mon_update_neighbors() : update preceding neighbors of added/removed peer + */ +static void mon_update_neighbors(struct tipc_monitor *mon, + struct tipc_peer *peer) +{ + int dz, i; + + dz = dom_size(mon->peer_cnt); + for (i = 0; i < dz; i++) { + mon_apply_domain(mon, peer); + peer = peer_prev(peer); + } +} + +/* mon_assign_roles() : reassign peer roles after a network change + * The monitor list is consistent at this stage; i.e., each peer is monitoring + * a set of domain members as matched between domain record and the monitor list + */ +static void mon_assign_roles(struct tipc_monitor *mon, struct tipc_peer *head) +{ + struct tipc_peer *peer = peer_nxt(head); + struct tipc_peer *self = mon->self; + int i = 0; + + for (; peer != self; peer = peer_nxt(peer)) { + peer->is_local = false; + + /* Update domain member */ + if (i++ < head->applied) { + peer->is_head = false; + if (head == self) + peer->is_local = true; + continue; + } + /* Assign next domain head */ + if (!peer->is_up) + continue; + if (peer->is_head) + break; + head = peer; + head->is_head = true; + i = 0; + } + mon->list_gen++; +} + +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *prev, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) + goto exit; + prev = peer_prev(peer); + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + mon->peer_cnt--; + head = peer_head(prev); + if (head == self) + mon_update_local_domain(mon); + mon_update_neighbors(mon, prev); + + /* Revert to full-mesh monitoring if we reach threshold */ + if (!tipc_mon_is_active(net, mon)) { + list_for_each_entry(peer, &self->list, list) { + kfree(peer->domain); + peer->domain = NULL; + peer->applied = 0; + } + } + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +static bool tipc_mon_add_peer(struct tipc_monitor *mon, u32 addr, + struct tipc_peer **peer) +{ + struct tipc_peer *self = mon->self; + struct tipc_peer *cur, *prev, *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + *peer = p; + if (!p) + return false; + p->addr = addr; + + /* Add new peer to lookup list */ + INIT_LIST_HEAD(&p->list); + hlist_add_head(&p->hash, &mon->peers[tipc_hashfn(addr)]); + + /* Sort new peer into iterator list, in ascending circular order */ + prev = self; + list_for_each_entry(cur, &self->list, list) { + if ((addr > prev->addr) && (addr < cur->addr)) + break; + if (((addr < cur->addr) || (addr > prev->addr)) && + (prev->addr > cur->addr)) + break; + prev = cur; + } + list_add_tail(&p->list, &cur->list); + mon->peer_cnt++; + mon_update_neighbors(mon, p); + return true; +} + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer && !tipc_mon_add_peer(mon, addr, &peer)) + goto exit; + peer->is_up = true; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *head; + struct tipc_mon_domain *dom; + int applied; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer) { + pr_warn("Mon: unknown link %x/%u DOWN\n", addr, bearer_id); + goto exit; + } + applied = peer->applied; + peer->applied = 0; + dom = peer->domain; + peer->domain = NULL; + if (peer->is_head) + mon_identify_lost_members(peer, dom, applied); + kfree(dom); + peer->is_up = false; + peer->is_head = false; + peer->is_local = false; + peer->down_cnt = 0; + head = peer_head(peer); + if (head == self) + mon_update_local_domain(mon); + mon_assign_roles(mon, head); +exit: + write_unlock_bh(&mon->lock); +} + +/* tipc_mon_rcv - process monitor domain event message + */ +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *arrv_dom = data; + struct tipc_mon_domain dom_bef; + struct tipc_mon_domain *dom; + struct tipc_peer *peer; + u16 new_member_cnt = ntohs(arrv_dom->member_cnt); + int new_dlen = dom_rec_len(arrv_dom, new_member_cnt); + u16 new_gen = ntohs(arrv_dom->gen); + u16 acked_gen = ntohs(arrv_dom->ack_gen); + bool probing = state->probing; + int i, applied_bef; + + state->probing = false; + if (!dlen) + return; + + /* Sanity check received domain record */ + if ((dlen < new_dlen) || ntohs(arrv_dom->len) != new_dlen) { + pr_warn_ratelimited("Received illegal domain record\n"); + return; + } + + /* Synch generation numbers with peer if link just came up */ + if (!state->synched) { + state->peer_gen = new_gen - 1; + state->acked_gen = acked_gen; + state->synched = true; + } + + if (more(acked_gen, state->acked_gen)) + state->acked_gen = acked_gen; + + /* Drop duplicate unless we are waiting for a probe response */ + if (!more(new_gen, state->peer_gen) && !probing) + return; + + write_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (!peer || !peer->is_up) + goto exit; + + /* Peer is confirmed, stop any ongoing probing */ + peer->down_cnt = 0; + + /* Task is done for duplicate record */ + if (!more(new_gen, state->peer_gen)) + goto exit; + + state->peer_gen = new_gen; + + /* Cache current domain record for later use */ + dom_bef.member_cnt = 0; + dom = peer->domain; + if (dom) + memcpy(&dom_bef, dom, dom->len); + + /* Transform and store received domain record */ + if (!dom || (dom->len < new_dlen)) { + kfree(dom); + dom = kmalloc(new_dlen, GFP_ATOMIC); + peer->domain = dom; + if (!dom) + goto exit; + } + dom->len = new_dlen; + dom->gen = new_gen; + dom->member_cnt = new_member_cnt; + dom->up_map = be64_to_cpu(arrv_dom->up_map); + for (i = 0; i < new_member_cnt; i++) + dom->members[i] = ntohl(arrv_dom->members[i]); + + /* Update peers affected by this domain record */ + applied_bef = peer->applied; + mon_apply_domain(mon, peer); + mon_identify_lost_members(peer, &dom_bef, applied_bef); + mon_assign_roles(mon, peer_head(peer)); +exit: + write_unlock_bh(&mon->lock); +} + +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_mon_domain *dom = data; + u16 gen = mon->dom_gen; + u16 len; + + if (!tipc_mon_is_active(net, mon)) + return; + + /* Send only a dummy record with ack if peer has acked our last sent */ + if (likely(state->acked_gen == gen)) { + len = dom_rec_len(dom, 0); + *dlen = len; + dom->len = htons(len); + dom->gen = htons(gen); + dom->ack_gen = htons(state->peer_gen); + dom->member_cnt = 0; + return; + } + /* Send the full record */ + read_lock_bh(&mon->lock); + len = ntohs(mon->cache.len); + *dlen = len; + memcpy(data, &mon->cache, len); + read_unlock_bh(&mon->lock); + dom->ack_gen = htons(state->peer_gen); +} + +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer; + + /* Used cached state if table has not changed */ + if (!state->probing && + (state->list_gen == mon->list_gen) && + (state->acked_gen == mon->dom_gen)) + return; + + read_lock_bh(&mon->lock); + peer = get_peer(mon, addr); + if (peer) { + state->probing = state->acked_gen != mon->dom_gen; + state->probing |= peer->down_cnt; + state->reset |= peer->down_cnt >= MAX_PEER_DOWN_EVENTS; + state->monitoring = peer->is_local; + state->monitoring |= peer->is_head; + state->list_gen = mon->list_gen; + } + read_unlock_bh(&mon->lock); +} + +static void mon_timeout(unsigned long m) +{ + struct tipc_monitor *mon = (void *)m; + struct tipc_peer *self; + int best_member_cnt = dom_size(mon->peer_cnt) - 1; + + write_lock_bh(&mon->lock); + self = mon->self; + if (self && (best_member_cnt != self->applied)) { + mon_update_local_domain(mon); + mon_assign_roles(mon, self); + } + write_unlock_bh(&mon->lock); + mod_timer(&mon->timer, jiffies + mon->timer_intv); +} + +int tipc_mon_create(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon; + struct tipc_peer *self; + struct tipc_mon_domain *dom; + + if (tn->monitors[bearer_id]) + return 0; + + mon = kzalloc(sizeof(*mon), GFP_ATOMIC); + self = kzalloc(sizeof(*self), GFP_ATOMIC); + dom = kzalloc(sizeof(*dom), GFP_ATOMIC); + if (!mon || !self || !dom) { + kfree(mon); + kfree(self); + kfree(dom); + return -ENOMEM; + } + tn->monitors[bearer_id] = mon; + rwlock_init(&mon->lock); + mon->net = net; + mon->peer_cnt = 1; + mon->self = self; + self->domain = dom; + self->addr = tipc_own_addr(net); + self->is_up = true; + self->is_head = true; + INIT_LIST_HEAD(&self->list); + setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); + mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); + mod_timer(&mon->timer, jiffies + mon->timer_intv); + return 0; +} + +void tipc_mon_delete(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *self = get_self(net, bearer_id); + struct tipc_peer *peer, *tmp; + + write_lock_bh(&mon->lock); + tn->monitors[bearer_id] = NULL; + list_for_each_entry_safe(peer, tmp, &self->list, list) { + list_del(&peer->list); + hlist_del(&peer->hash); + kfree(peer->domain); + kfree(peer); + } + mon->self = NULL; + write_unlock_bh(&mon->lock); + del_timer_sync(&mon->timer); + kfree(self->domain); + kfree(self); + kfree(mon); +} + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} + +int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) + goto attr_msg_full; + } + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + struct tipc_peer *peer = mon->self; + + if (!mon) + return -EINVAL; + + read_lock_bh(&mon->lock); + do { + if (*prev_node) { + if (peer->addr == *prev_node) + *prev_node = 0; + else + continue; + } + if (__tipc_nl_add_monitor_peer(peer, msg)) { + *prev_node = peer->addr; + read_unlock_bh(&mon->lock); + return -EMSGSIZE; + } + } while ((peer = peer_nxt(peer)) != mon->self); + read_unlock_bh(&mon->lock); + + return 0; +} + +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id) +{ + struct tipc_monitor *mon = tipc_monitor(net, bearer_id); + char bearer_name[TIPC_MAX_BEARER_NAME]; + struct nlattr *attrs; + void *hdr; + int ret; + + ret = tipc_bearer_get_name(net, bearer_name, bearer_id); + if (ret || !mon) + return -EINVAL; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + read_lock_bh(&mon->lock); + if (nla_put_u32(msg->skb, TIPC_NLA_MON_REF, bearer_id)) + goto attr_msg_full; + if (tipc_mon_is_active(net, mon)) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_ACTIVE)) + goto attr_msg_full; + if (nla_put_string(msg->skb, TIPC_NLA_MON_BEARER_NAME, bearer_name)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEERCNT, mon->peer_cnt)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_LISTGEN, mon->list_gen)) + goto attr_msg_full; + + read_unlock_bh(&mon->lock); + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + read_unlock_bh(&mon->lock); + + return -EMSGSIZE; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h new file mode 100644 index 000000000000..2a21b93e0d04 --- /dev/null +++ b/net/tipc/monitor.h @@ -0,0 +1,82 @@ +/* + * net/tipc/monitor.h + * + * Copyright (c) 2015, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_MONITOR_H +#define _TIPC_MONITOR_H + +#include "netlink.h" + +/* struct tipc_mon_state: link instance's cache of monitor list and domain state + * @list_gen: current generation of this node's monitor list + * @gen: current generation of this node's local domain + * @peer_gen: most recent domain generation received from peer + * @acked_gen: most recent generation of self's domain acked by peer + * @monitoring: this peer endpoint should continuously monitored + * @probing: peer endpoint should be temporarily probed for potential loss + * @synched: domain record's generation has been synched with peer after reset + */ +struct tipc_mon_state { + u16 list_gen; + u16 peer_gen; + u16 acked_gen; + bool monitoring :1; + bool probing :1; + bool reset :1; + bool synched :1; +}; + +int tipc_mon_create(struct net *net, int bearer_id); +void tipc_mon_delete(struct net *net, int bearer_id); + +void tipc_mon_peer_up(struct net *net, u32 addr, int bearer_id); +void tipc_mon_peer_down(struct net *net, u32 addr, int bearer_id); +void tipc_mon_prep(struct net *net, void *data, int *dlen, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_rcv(struct net *net, void *data, u16 dlen, u32 addr, + struct tipc_mon_state *state, int bearer_id); +void tipc_mon_get_state(struct net *net, u32 addr, + struct tipc_mon_state *state, + int bearer_id); +void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); +int __tipc_nl_add_monitor(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id); +int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, + u32 bearer_id, u32 *prev_node); + +extern const int tipc_max_domain_size; +#endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 56935df2167a..a84daec0afe9 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -52,7 +52,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_MEDIA] = { .type = NLA_NESTED, }, [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, [TIPC_NLA_NET] = { .type = NLA_NESTED, }, - [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, }; const struct nla_policy @@ -61,6 +62,12 @@ tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } }; +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_REF] = { .type = NLA_U32 }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_SOCK_ADDR] = { .type = NLA_U32 }, @@ -214,7 +221,23 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd = TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, .policy = tipc_nl_policy, - } + }, + { + .cmd = TIPC_NL_MON_SET, + .doit = tipc_nl_node_set_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_GET, + .doit = tipc_nl_node_get_monitor, + .dumpit = tipc_nl_node_dump_monitor, + .policy = tipc_nl_policy, + }, + { + .cmd = TIPC_NL_MON_PEER_GET, + .dumpit = tipc_nl_node_dump_monitor_peer, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index ed1dbcb4afbd..4ba0ad422110 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -55,6 +55,7 @@ extern const struct nla_policy tipc_nl_prop_policy[]; extern const struct nla_policy tipc_nl_bearer_policy[]; extern const struct nla_policy tipc_nl_media_policy[]; extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; int tipc_netlink_start(void); int tipc_netlink_compat_start(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index 23d4761842a0..21974191e425 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -40,6 +40,7 @@ #include "name_distr.h" #include "socket.h" #include "bcast.h" +#include "monitor.h" #include "discover.h" #include "netlink.h" @@ -205,17 +206,6 @@ u16 tipc_node_get_capabilities(struct net *net, u32 addr) return caps; } -/* - * A trivial power-of-two bitmask technique is used for speed, since this - * operation is done for every incoming TIPC packet. The number of hash table - * entries has been chosen so that no hash chain exceeds 8 nodes and will - * usually be much smaller (typically only a single node). - */ -static unsigned int tipc_hashfn(u32 addr) -{ - return addr & (NODE_HTABLE_SIZE - 1); -} - static void tipc_node_kref_release(struct kref *kref) { struct tipc_node *n = container_of(kref, struct tipc_node, kref); @@ -279,6 +269,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) u32 addr = 0; u32 flags = n->action_flags; u32 link_id = 0; + u32 bearer_id; struct list_head *publ_list; if (likely(!flags)) { @@ -288,6 +279,7 @@ static void tipc_node_write_unlock(struct tipc_node *n) addr = n->addr; link_id = n->link_id; + bearer_id = link_id & 0xffff; publ_list = &n->publ_list; n->action_flags &= ~(TIPC_NOTIFY_NODE_DOWN | TIPC_NOTIFY_NODE_UP | @@ -301,13 +293,16 @@ static void tipc_node_write_unlock(struct tipc_node *n) if (flags & TIPC_NOTIFY_NODE_UP) tipc_named_node_up(net, addr); - if (flags & TIPC_NOTIFY_LINK_UP) + if (flags & TIPC_NOTIFY_LINK_UP) { + tipc_mon_peer_up(net, addr, bearer_id); tipc_nametbl_publish(net, TIPC_LINK_STATE, addr, addr, TIPC_NODE_SCOPE, link_id, addr); - - if (flags & TIPC_NOTIFY_LINK_DOWN) + } + if (flags & TIPC_NOTIFY_LINK_DOWN) { + tipc_mon_peer_down(net, addr, bearer_id); tipc_nametbl_withdraw(net, TIPC_LINK_STATE, addr, link_id, addr); + } } struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) @@ -378,14 +373,13 @@ static void tipc_node_calculate_timer(struct tipc_node *n, struct tipc_link *l) { unsigned long tol = tipc_link_tolerance(l); unsigned long intv = ((tol / 4) > 500) ? 500 : tol / 4; - unsigned long keepalive_intv = msecs_to_jiffies(intv); /* Link with lowest tolerance determines timer interval */ - if (keepalive_intv < n->keepalive_intv) - n->keepalive_intv = keepalive_intv; + if (intv < n->keepalive_intv) + n->keepalive_intv = intv; - /* Ensure link's abort limit corresponds to current interval */ - tipc_link_set_abort_limit(l, tol / jiffies_to_msecs(n->keepalive_intv)); + /* Ensure link's abort limit corresponds to current tolerance */ + tipc_link_set_abort_limit(l, tol / n->keepalive_intv); } static void tipc_node_delete(struct tipc_node *node) @@ -526,7 +520,7 @@ static void tipc_node_timeout(unsigned long data) if (rc & TIPC_LINK_DOWN_EVT) tipc_node_link_down(n, bearer_id, false); } - mod_timer(&n->timer, jiffies + n->keepalive_intv); + mod_timer(&n->timer, jiffies + msecs_to_jiffies(n->keepalive_intv)); } /** @@ -692,6 +686,7 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) struct tipc_link *l = le->link; struct tipc_media_addr *maddr; struct sk_buff_head xmitq; + int old_bearer_id = bearer_id; if (!l) return; @@ -711,6 +706,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_link_fsm_evt(l, LINK_RESET_EVT); } tipc_node_write_unlock(n); + if (delete) + tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } @@ -735,6 +732,7 @@ void tipc_node_check_dest(struct net *net, u32 onode, bool accept_addr = false; bool reset = true; char *if_name; + unsigned long intv; *dupl_addr = false; *respond = false; @@ -840,9 +838,11 @@ void tipc_node_check_dest(struct net *net, u32 onode, le->link = l; n->link_cnt++; tipc_node_calculate_timer(n, l); - if (n->link_cnt == 1) - if (!mod_timer(&n->timer, jiffies + n->keepalive_intv)) + if (n->link_cnt == 1) { + intv = jiffies + msecs_to_jiffies(n->keepalive_intv); + if (!mod_timer(&n->timer, intv)) tipc_node_get(n); + } } memcpy(&le->maddr, maddr, sizeof(*maddr)); exit: @@ -950,7 +950,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) state = SELF_UP_PEER_UP; break; case SELF_LOST_CONTACT_EVT: - state = SELF_DOWN_PEER_LEAVING; + state = SELF_DOWN_PEER_DOWN; break; case SELF_ESTABL_CONTACT_EVT: case PEER_LOST_CONTACT_EVT: @@ -969,7 +969,7 @@ static void tipc_node_fsm_evt(struct tipc_node *n, int evt) state = SELF_UP_PEER_UP; break; case PEER_LOST_CONTACT_EVT: - state = SELF_LEAVING_PEER_DOWN; + state = SELF_DOWN_PEER_DOWN; break; case SELF_LOST_CONTACT_EVT: case PEER_ESTABL_CONTACT_EVT: @@ -1928,3 +1928,168 @@ out: return skb->len; } + +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1]; + struct net *net = sock_net(skb->sk); + int err; + + if (!info->attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_MON_MAX, + info->attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy); + if (err) + return err; + + if (attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) { + u32 val; + + val = nla_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]); + err = tipc_nl_monitor_set_threshold(net, val); + if (err) + return err; + } + + return 0; +} + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + val = tipc_nl_monitor_get_threshold(net); + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} + +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_bearer = cb->args[0]; + struct tipc_nl_msg msg; + int err; + int i; + + if (prev_bearer == MAX_BEARERS) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + for (i = prev_bearer; i < MAX_BEARERS; i++) { + prev_bearer = i; + err = __tipc_nl_add_monitor(net, &msg, prev_bearer); + if (err) + goto out; + } + +out: + rtnl_unlock(); + cb->args[0] = prev_bearer; + + return skb->len; +} + +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + u32 prev_node = cb->args[1]; + u32 bearer_id = cb->args[2]; + int done = cb->args[0]; + struct tipc_nl_msg msg; + int err; + + if (!prev_node) { + struct nlattr **attrs; + struct nlattr *mon[TIPC_NLA_MON_MAX + 1]; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_MON]) + return -EINVAL; + + err = nla_parse_nested(mon, TIPC_NLA_MON_MAX, + attrs[TIPC_NLA_MON], + tipc_nl_monitor_policy); + if (err) + return err; + + if (!mon[TIPC_NLA_MON_REF]) + return -EINVAL; + + bearer_id = nla_get_u32(mon[TIPC_NLA_MON_REF]); + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + } + + if (done) + return 0; + + msg.skb = skb; + msg.portid = NETLINK_CB(cb->skb).portid; + msg.seq = cb->nlh->nlmsg_seq; + + rtnl_lock(); + err = tipc_nl_add_monitor_peer(net, &msg, bearer_id, &prev_node); + if (!err) + done = 1; + + rtnl_unlock(); + cb->args[0] = done; + cb->args[1] = prev_node; + cb->args[2] = bearer_id; + + return skb->len; +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 8264b3d97dc4..d69fdfcc0ec9 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -78,4 +78,9 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_dump_monitor(struct sk_buff *skb, struct netlink_callback *cb); +int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb, + struct netlink_callback *cb); #endif diff --git a/net/tipc/server.c b/net/tipc/server.c index 272d20a795d5..215849ce453d 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -418,13 +418,12 @@ static struct outqueue_entry *tipc_alloc_entry(void *data, int len) if (!entry) return NULL; - buf = kmalloc(len, GFP_ATOMIC); + buf = kmemdup(data, len, GFP_ATOMIC); if (!buf) { kfree(entry); return NULL; } - memcpy(buf, data, len); entry->iov.iov_base = buf; entry->iov.iov_len = len; diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index c9cf2be3674a..b016c011970b 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -63,7 +63,7 @@ */ struct udp_media_addr { __be16 proto; - __be16 udp_port; + __be16 port; union { struct in_addr ipv4; struct in6_addr ipv6; @@ -108,9 +108,9 @@ static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size) struct udp_media_addr *ua = (struct udp_media_addr *)&a->value; if (ntohs(ua->proto) == ETH_P_IP) - snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port)); + snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->port)); else if (ntohs(ua->proto) == ETH_P_IPV6) - snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port)); + snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->port)); else pr_err("Invalid UDP media address\n"); return 0; @@ -178,8 +178,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, skb->dev = rt->dst.dev; ttl = ip4_dst_hoplimit(&rt->dst); udp_tunnel_xmit_skb(rt, ub->ubsock->sk, skb, src->ipv4.s_addr, - dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, - dst->udp_port, false, true); + dst->ipv4.s_addr, 0, ttl, 0, src->port, + dst->port, false, true); #if IS_ENABLED(CONFIG_IPV6) } else { struct dst_entry *ndst; @@ -196,8 +196,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, ttl = ip6_dst_hoplimit(ndst); err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, &src->ipv6, - &dst->ipv6, 0, ttl, 0, src->udp_port, - dst->udp_port, false); + &dst->ipv6, 0, ttl, 0, src->port, + dst->port, false); #endif } return err; @@ -292,12 +292,12 @@ err: ip4 = (struct sockaddr_in *)&sa_local; local->proto = htons(ETH_P_IP); - local->udp_port = ip4->sin_port; + local->port = ip4->sin_port; local->ipv4.s_addr = ip4->sin_addr.s_addr; ip4 = (struct sockaddr_in *)&sa_remote; remote->proto = htons(ETH_P_IP); - remote->udp_port = ip4->sin_port; + remote->port = ip4->sin_port; remote->ipv4.s_addr = ip4->sin_addr.s_addr; return 0; @@ -312,13 +312,13 @@ err: return -EINVAL; local->proto = htons(ETH_P_IPV6); - local->udp_port = ip6->sin6_port; + local->port = ip6->sin6_port; memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); ub->ifindex = ip6->sin6_scope_id; ip6 = (struct sockaddr_in6 *)&sa_remote; remote->proto = htons(ETH_P_IPV6); - remote->udp_port = ip6->sin6_port; + remote->port = ip6->sin6_port; memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); return 0; #endif @@ -386,7 +386,7 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, err = -EAFNOSUPPORT; goto err; } - udp_conf.local_udp_port = local.udp_port; + udp_conf.local_udp_port = local.port; err = udp_sock_create(net, &udp_conf, &ub->ubsock); if (err) goto err; |