diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-20 21:04:47 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-20 21:04:47 -0700 |
commit | 3b59bf081622b6446db77ad06c93fe23677bc533 (patch) | |
tree | 3f4bb5a27c90cc86994a1f6d3c53fbf9208003cb /net/tipc/bcast.c | |
parent | e45836fafe157df137a837093037f741ad8f4c90 (diff) | |
parent | bbdb32cb5b73597386913d052165423b9d736145 (diff) | |
download | linux-3b59bf081622b6446db77ad06c93fe23677bc533.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking merge from David Miller:
"1) Move ixgbe driver over to purely page based buffering on receive.
From Alexander Duyck.
2) Add receive packet steering support to e1000e, from Bruce Allan.
3) Convert TCP MD5 support over to RCU, from Eric Dumazet.
4) Reduce cpu usage in handling out-of-order TCP packets on modern
systems, also from Eric Dumazet.
5) Support the IP{,V6}_UNICAST_IF socket options, making the wine
folks happy, from Erich Hoover.
6) Support VLAN trunking from guests in hyperv driver, from Haiyang
Zhang.
7) Support byte-queue-limtis in r8169, from Igor Maravic.
8) Outline code intended for IP_RECVTOS in IP_PKTOPTIONS existed but
was never properly implemented, Jiri Benc fixed that.
9) 64-bit statistics support in r8169 and 8139too, from Junchang Wang.
10) Support kernel side dump filtering by ctmark in netfilter
ctnetlink, from Pablo Neira Ayuso.
11) Support byte-queue-limits in gianfar driver, from Paul Gortmaker.
12) Add new peek socket options to assist with socket migration, from
Pavel Emelyanov.
13) Add sch_plug packet scheduler whose queue is controlled by
userland daemons using explicit freeze and release commands. From
Shriram Rajagopalan.
14) Fix FCOE checksum offload handling on transmit, from Yi Zou."
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1846 commits)
Fix pppol2tp getsockname()
Remove printk from rds_sendmsg
ipv6: fix incorrent ipv6 ipsec packet fragment
cpsw: Hook up default ndo_change_mtu.
net: qmi_wwan: fix build error due to cdc-wdm dependecy
netdev: driver: ethernet: Add TI CPSW driver
netdev: driver: ethernet: add cpsw address lookup engine support
phy: add am79c874 PHY support
mlx4_core: fix race on comm channel
bonding: send igmp report for its master
fs_enet: Add MPC5125 FEC support and PHY interface selection
net: bpf_jit: fix BPF_S_LDX_B_MSH compilation
net: update the usage of CHECKSUM_UNNECESSARY
fcoe: use CHECKSUM_UNNECESSARY instead of CHECKSUM_PARTIAL on tx
net: do not do gso for CHECKSUM_UNNECESSARY in netif_needs_gso
ixgbe: Fix issues with SR-IOV loopback when flow control is disabled
net/hyperv: Fix the code handling tx busy
ixgbe: fix namespace issues when FCoE/DCB is not enabled
rtlwifi: Remove unused ETH_ADDR_LEN defines
igbvf: Use ETH_ALEN
...
Fix up fairly trivial conflicts in drivers/isdn/gigaset/interface.c and
drivers/net/usb/{Kconfig,qmi_wwan.c} as per David.
Diffstat (limited to 'net/tipc/bcast.c')
-rw-r--r-- | net/tipc/bcast.c | 336 |
1 files changed, 161 insertions, 175 deletions
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 8eb87b11d100..e00441a2092f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -157,39 +157,14 @@ u32 tipc_bclink_get_last_sent(void) return bcl->fsm_msg_cnt; } -/** - * bclink_set_gap - set gap according to contents of current deferred pkt queue - * - * Called with 'node' locked, bc_lock unlocked - */ - -static void bclink_set_gap(struct tipc_node *n_ptr) -{ - struct sk_buff *buf = n_ptr->bclink.deferred_head; - - n_ptr->bclink.gap_after = n_ptr->bclink.gap_to = - mod(n_ptr->bclink.last_in); - if (unlikely(buf != NULL)) - n_ptr->bclink.gap_to = mod(buf_seqno(buf) - 1); -} - -/** - * bclink_ack_allowed - test if ACK or NACK message can be sent at this moment - * - * This mechanism endeavours to prevent all nodes in network from trying - * to ACK or NACK at the same time. - * - * Note: TIPC uses a different trigger to distribute ACKs than it does to - * distribute NACKs, but tries to use the same spacing (divide by 16). - */ - -static int bclink_ack_allowed(u32 n) +static void bclink_update_last_sent(struct tipc_node *node, u32 seqno) { - return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag; + node->bclink.last_sent = less_eq(node->bclink.last_sent, seqno) ? + seqno : node->bclink.last_sent; } -/** +/* * tipc_bclink_retransmit_to - get most recent node to request retransmission * * Called with bc_lock locked @@ -281,7 +256,7 @@ void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked) if (bcbuf_acks(crs) == 0) { bcl->first_out = next; bcl->out_queue_size--; - buf_discard(crs); + kfree_skb(crs); released = 1; } crs = next; @@ -300,140 +275,94 @@ exit: spin_unlock_bh(&bc_lock); } -/** - * bclink_send_ack - unicast an ACK msg +/* + * tipc_bclink_update_link_state - update broadcast link state * * tipc_net_lock and node lock set */ -static void bclink_send_ack(struct tipc_node *n_ptr) +void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent) { - struct tipc_link *l_ptr = n_ptr->active_links[n_ptr->addr & 1]; + struct sk_buff *buf; - if (l_ptr != NULL) - tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0); -} + /* Ignore "stale" link state info */ -/** - * bclink_send_nack- broadcast a NACK msg - * - * tipc_net_lock and node lock set - */ + if (less_eq(last_sent, n_ptr->bclink.last_in)) + return; -static void bclink_send_nack(struct tipc_node *n_ptr) -{ - struct sk_buff *buf; - struct tipc_msg *msg; + /* Update link synchronization state; quit if in sync */ + + bclink_update_last_sent(n_ptr, last_sent); + + if (n_ptr->bclink.last_sent == n_ptr->bclink.last_in) + return; + + /* Update out-of-sync state; quit if loss is still unconfirmed */ + + if ((++n_ptr->bclink.oos_state) == 1) { + if (n_ptr->bclink.deferred_size < (TIPC_MIN_LINK_WIN / 2)) + return; + n_ptr->bclink.oos_state++; + } - if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to)) + /* Don't NACK if one has been recently sent (or seen) */ + + if (n_ptr->bclink.oos_state & 0x1) return; + /* Send NACK */ + buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { - msg = buf_msg(buf); + struct tipc_msg *msg = buf_msg(buf); + tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, - INT_H_SIZE, n_ptr->addr); + INT_H_SIZE, n_ptr->addr); msg_set_non_seq(msg, 1); msg_set_mc_netid(msg, tipc_net_id); - msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in)); - msg_set_bcgap_after(msg, n_ptr->bclink.gap_after); - msg_set_bcgap_to(msg, n_ptr->bclink.gap_to); - msg_set_bcast_tag(msg, tipc_own_tag); + msg_set_bcast_ack(msg, n_ptr->bclink.last_in); + msg_set_bcgap_after(msg, n_ptr->bclink.last_in); + msg_set_bcgap_to(msg, n_ptr->bclink.deferred_head + ? buf_seqno(n_ptr->bclink.deferred_head) - 1 + : n_ptr->bclink.last_sent); + spin_lock_bh(&bc_lock); tipc_bearer_send(&bcbearer->bearer, buf, NULL); bcl->stats.sent_nacks++; - buf_discard(buf); - - /* - * Ensure we doesn't send another NACK msg to the node - * until 16 more deferred messages arrive from it - * (i.e. helps prevent all nodes from NACK'ing at same time) - */ + spin_unlock_bh(&bc_lock); + kfree_skb(buf); - n_ptr->bclink.nack_sync = tipc_own_tag; + n_ptr->bclink.oos_state++; } } -/** - * tipc_bclink_check_gap - send a NACK if a sequence gap exists +/* + * bclink_peek_nack - monitor retransmission requests sent by other nodes * - * tipc_net_lock and node lock set - */ - -void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 last_sent) -{ - if (!n_ptr->bclink.supported || - less_eq(last_sent, mod(n_ptr->bclink.last_in))) - return; - - bclink_set_gap(n_ptr); - if (n_ptr->bclink.gap_after == n_ptr->bclink.gap_to) - n_ptr->bclink.gap_to = last_sent; - bclink_send_nack(n_ptr); -} - -/** - * tipc_bclink_peek_nack - process a NACK msg meant for another node + * Delay any upcoming NACK by this node if another node has already + * requested the first message this node is going to ask for. * * Only tipc_net_lock set. */ -static void tipc_bclink_peek_nack(u32 dest, u32 sender_tag, u32 gap_after, u32 gap_to) +static void bclink_peek_nack(struct tipc_msg *msg) { - struct tipc_node *n_ptr = tipc_node_find(dest); - u32 my_after, my_to; + struct tipc_node *n_ptr = tipc_node_find(msg_destnode(msg)); - if (unlikely(!n_ptr || !tipc_node_is_up(n_ptr))) + if (unlikely(!n_ptr)) return; + tipc_node_lock(n_ptr); - /* - * Modify gap to suppress unnecessary NACKs from this node - */ - my_after = n_ptr->bclink.gap_after; - my_to = n_ptr->bclink.gap_to; - - if (less_eq(gap_after, my_after)) { - if (less(my_after, gap_to) && less(gap_to, my_to)) - n_ptr->bclink.gap_after = gap_to; - else if (less_eq(my_to, gap_to)) - n_ptr->bclink.gap_to = n_ptr->bclink.gap_after; - } else if (less_eq(gap_after, my_to)) { - if (less_eq(my_to, gap_to)) - n_ptr->bclink.gap_to = gap_after; - } else { - /* - * Expand gap if missing bufs not in deferred queue: - */ - struct sk_buff *buf = n_ptr->bclink.deferred_head; - u32 prev = n_ptr->bclink.gap_to; - for (; buf; buf = buf->next) { - u32 seqno = buf_seqno(buf); + if (n_ptr->bclink.supported && + (n_ptr->bclink.last_in != n_ptr->bclink.last_sent) && + (n_ptr->bclink.last_in == msg_bcgap_after(msg))) + n_ptr->bclink.oos_state = 2; - if (mod(seqno - prev) != 1) { - buf = NULL; - break; - } - if (seqno == gap_after) - break; - prev = seqno; - } - if (buf == NULL) - n_ptr->bclink.gap_to = gap_after; - } - /* - * Some nodes may send a complementary NACK now: - */ - if (bclink_ack_allowed(sender_tag + 1)) { - if (n_ptr->bclink.gap_to != n_ptr->bclink.gap_after) { - bclink_send_nack(n_ptr); - bclink_set_gap(n_ptr); - } - } tipc_node_unlock(n_ptr); } -/** +/* * tipc_bclink_send_msg - broadcast a packet to all nodes in cluster */ @@ -445,7 +374,7 @@ int tipc_bclink_send_msg(struct sk_buff *buf) if (!bclink->bcast_nodes.count) { res = msg_data_sz(buf_msg(buf)); - buf_discard(buf); + kfree_skb(buf); goto exit; } @@ -460,7 +389,33 @@ exit: return res; } -/** +/* + * bclink_accept_pkt - accept an incoming, in-sequence broadcast packet + * + * Called with both sending node's lock and bc_lock taken. + */ + +static void bclink_accept_pkt(struct tipc_node *node, u32 seqno) +{ + bclink_update_last_sent(node, seqno); + node->bclink.last_in = seqno; + node->bclink.oos_state = 0; + bcl->stats.recv_info++; + + /* + * Unicast an ACK periodically, ensuring that + * all nodes in the cluster don't ACK at the same time + */ + + if (((seqno - tipc_own_addr) % TIPC_MIN_LINK_WIN) == 0) { + tipc_link_send_proto_msg( + node->active_links[node->addr & 1], + STATE_MSG, 0, 0, 0, 0, 0); + bcl->stats.sent_acks++; + } +} + +/* * tipc_bclink_recv_pkt - receive a broadcast packet, and deliver upwards * * tipc_net_lock is read_locked, no other locks set @@ -472,7 +427,7 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) struct tipc_node *node; u32 next_in; u32 seqno; - struct sk_buff *deferred; + int deferred; /* Screen out unwanted broadcast messages */ @@ -487,6 +442,8 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) if (unlikely(!node->bclink.supported)) goto unlock; + /* Handle broadcast protocol message */ + if (unlikely(msg_user(msg) == BCAST_PROTOCOL)) { if (msg_type(msg) != STATE_MSG) goto unlock; @@ -501,89 +458,118 @@ void tipc_bclink_recv_pkt(struct sk_buff *buf) spin_unlock_bh(&bc_lock); } else { tipc_node_unlock(node); - tipc_bclink_peek_nack(msg_destnode(msg), - msg_bcast_tag(msg), - msg_bcgap_after(msg), - msg_bcgap_to(msg)); + bclink_peek_nack(msg); } goto exit; } /* Handle in-sequence broadcast message */ -receive: - next_in = mod(node->bclink.last_in + 1); seqno = msg_seqno(msg); + next_in = mod(node->bclink.last_in + 1); if (likely(seqno == next_in)) { - bcl->stats.recv_info++; - node->bclink.last_in++; - bclink_set_gap(node); - if (unlikely(bclink_ack_allowed(seqno))) { - bclink_send_ack(node); - bcl->stats.sent_acks++; - } +receive: + /* Deliver message to destination */ + if (likely(msg_isdata(msg))) { + spin_lock_bh(&bc_lock); + bclink_accept_pkt(node, seqno); + spin_unlock_bh(&bc_lock); tipc_node_unlock(node); if (likely(msg_mcast(msg))) tipc_port_recv_mcast(buf, NULL); else - buf_discard(buf); + kfree_skb(buf); } else if (msg_user(msg) == MSG_BUNDLER) { + spin_lock_bh(&bc_lock); + bclink_accept_pkt(node, seqno); bcl->stats.recv_bundles++; bcl->stats.recv_bundled += msg_msgcnt(msg); + spin_unlock_bh(&bc_lock); tipc_node_unlock(node); tipc_link_recv_bundle(buf); } else if (msg_user(msg) == MSG_FRAGMENTER) { + int ret = tipc_link_recv_fragment(&node->bclink.defragm, + &buf, &msg); + if (ret < 0) + goto unlock; + spin_lock_bh(&bc_lock); + bclink_accept_pkt(node, seqno); bcl->stats.recv_fragments++; - if (tipc_link_recv_fragment(&node->bclink.defragm, - &buf, &msg)) + if (ret > 0) bcl->stats.recv_fragmented++; + spin_unlock_bh(&bc_lock); tipc_node_unlock(node); tipc_net_route_msg(buf); } else if (msg_user(msg) == NAME_DISTRIBUTOR) { + spin_lock_bh(&bc_lock); + bclink_accept_pkt(node, seqno); + spin_unlock_bh(&bc_lock); tipc_node_unlock(node); tipc_named_recv(buf); } else { + spin_lock_bh(&bc_lock); + bclink_accept_pkt(node, seqno); + spin_unlock_bh(&bc_lock); tipc_node_unlock(node); - buf_discard(buf); + kfree_skb(buf); } buf = NULL; + + /* Determine new synchronization state */ + tipc_node_lock(node); - deferred = node->bclink.deferred_head; - if (deferred && (buf_seqno(deferred) == mod(next_in + 1))) { - buf = deferred; - msg = buf_msg(buf); - node->bclink.deferred_head = deferred->next; - goto receive; - } - } else if (less(next_in, seqno)) { - u32 gap_after = node->bclink.gap_after; - u32 gap_to = node->bclink.gap_to; - - if (tipc_link_defer_pkt(&node->bclink.deferred_head, - &node->bclink.deferred_tail, - buf)) { - node->bclink.nack_sync++; - bcl->stats.deferred_recv++; - if (seqno == mod(gap_after + 1)) - node->bclink.gap_after = seqno; - else if (less(gap_after, seqno) && less(seqno, gap_to)) - node->bclink.gap_to = seqno; + if (unlikely(!tipc_node_is_up(node))) + goto unlock; + + if (node->bclink.last_in == node->bclink.last_sent) + goto unlock; + + if (!node->bclink.deferred_head) { + node->bclink.oos_state = 1; + goto unlock; } + + msg = buf_msg(node->bclink.deferred_head); + seqno = msg_seqno(msg); + next_in = mod(next_in + 1); + if (seqno != next_in) + goto unlock; + + /* Take in-sequence message from deferred queue & deliver it */ + + buf = node->bclink.deferred_head; + node->bclink.deferred_head = buf->next; + node->bclink.deferred_size--; + goto receive; + } + + /* Handle out-of-sequence broadcast message */ + + if (less(next_in, seqno)) { + deferred = tipc_link_defer_pkt(&node->bclink.deferred_head, + &node->bclink.deferred_tail, + buf); + node->bclink.deferred_size += deferred; + bclink_update_last_sent(node, seqno); buf = NULL; - if (bclink_ack_allowed(node->bclink.nack_sync)) { - if (gap_to != gap_after) - bclink_send_nack(node); - bclink_set_gap(node); - } - } else { + } else + deferred = 0; + + spin_lock_bh(&bc_lock); + + if (deferred) + bcl->stats.deferred_recv++; + else bcl->stats.duplicates++; - } + + spin_unlock_bh(&bc_lock); + unlock: tipc_node_unlock(node); exit: - buf_discard(buf); + kfree_skb(buf); } u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr) |