diff options
author | David S. Miller <davem@davemloft.net> | 2018-04-19 13:05:17 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-04-19 13:05:17 -0400 |
commit | 50f694a62f670cabf14ff38f9251912864a64846 (patch) | |
tree | cc1b3d9495eeab4743e7086ff22a926854d79823 | |
parent | 415787d7799f4fccbe8d49cb0b8e5811be6b0389 (diff) | |
parent | feb5f2ec646483fb66f9ad7218b1aad2a93a2a5c (diff) | |
download | linux-50f694a62f670cabf14ff38f9251912864a64846.tar.bz2 |
Merge branch 'TCP-data-delivery-and-ECN-stats-tracking'
Yuchung Cheng says:
====================
tracking TCP data delivery and ECN stats
This patch series improve tracking the data delivery status
1. minor improvement on SYN data
2. accounting bytes delivered with CE marks
3. exporting the delivery stats to applications
s.t. users can get better sense of TCP performance at per host,
per connection, and even per application message level.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/snmp.h | 2 | ||||
-rw-r--r-- | include/uapi/linux/tcp.h | 5 | ||||
-rw-r--r-- | net/ipv4/proc.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 33 |
6 files changed, 45 insertions, 6 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8f4c54986f97..20585d5c4e1c 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -281,6 +281,7 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 delivered_ce; /* Like the above but only ECE marked packets */ u32 lost; /* Total data packets lost incl. rexmits */ u32 app_limited; /* limited until "delivered" reaches this val */ u64 first_tx_mstamp; /* start of window send phase */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 33a70ece462f..d02e859301ff 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -276,6 +276,8 @@ enum LINUX_MIB_TCPKEEPALIVE, /* TCPKeepAlive */ LINUX_MIB_TCPMTUPFAIL, /* TCPMTUPFail */ LINUX_MIB_TCPMTUPSUCCESS, /* TCPMTUPSuccess */ + LINUX_MIB_TCPDELIVERED, /* TCPDelivered */ + LINUX_MIB_TCPDELIVEREDCE, /* TCPDeliveredCE */ __LINUX_MIB_MAX }; diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 560374c978f9..379b08700a54 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -224,6 +224,9 @@ struct tcp_info { __u64 tcpi_busy_time; /* Time (usec) busy sending data */ __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */ __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */ + + __u32 tcpi_delivered; + __u32 tcpi_delivered_ce; }; /* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */ @@ -244,6 +247,8 @@ enum { TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */ TCP_NLA_CA_STATE, /* ca_state of socket */ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */ + TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */ + TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */ }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index a058de677e94..261b71d0ccc5 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -296,6 +296,8 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE), SNMP_MIB_ITEM("TCPMTUPFail", LINUX_MIB_TCPMTUPFAIL), SNMP_MIB_ITEM("TCPMTUPSuccess", LINUX_MIB_TCPMTUPSUCCESS), + SNMP_MIB_ITEM("TCPDelivered", LINUX_MIB_TCPDELIVERED), + SNMP_MIB_ITEM("TCPDeliveredCE", LINUX_MIB_TCPDELIVEREDCE), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 438fbca96cd3..4022073b0aee 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2559,6 +2559,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_cnt = 0; tp->window_clamp = 0; + tp->delivered_ce = 0; tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; tcp_clear_retrans(tp); @@ -3166,6 +3167,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) rate64 = tcp_compute_delivery_rate(tp); if (rate64) info->tcpi_delivery_rate = rate64; + info->tcpi_delivered = tp->delivered; + info->tcpi_delivered_ce = tp->delivered_ce; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3179,7 +3182,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) u32 rate; stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + - 5 * nla_total_size(sizeof(u32)) + + 7 * nla_total_size(sizeof(u32)) + 3 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -3210,9 +3213,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); nla_put_u32(stats, TCP_NLA_SND_SSTHRESH, tp->snd_ssthresh); + nla_put_u32(stats, TCP_NLA_DELIVERED, tp->delivered); + nla_put_u32(stats, TCP_NLA_DELIVERED_CE, tp->delivered_ce); nla_put_u32(stats, TCP_NLA_SNDQ_SIZE, tp->write_seq - tp->snd_una); nla_put_u8(stats, TCP_NLA_CA_STATE, inet_csk(sk)->icsk_ca_state); + return stats; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f93687f97d80..0396fb919b5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3496,6 +3496,22 @@ static void tcp_xmit_recovery(struct sock *sk, int rexmit) tcp_xmit_retransmit_queue(sk); } +/* Returns the number of packets newly acked or sacked by the current ACK */ +static u32 tcp_newly_delivered(struct sock *sk, u32 prior_delivered, int flag) +{ + const struct net *net = sock_net(sk); + struct tcp_sock *tp = tcp_sk(sk); + u32 delivered; + + delivered = tp->delivered - prior_delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVERED, delivered); + if (flag & FLAG_ECE) { + tp->delivered_ce += delivered; + NET_ADD_STATS(net, LINUX_MIB_TCPDELIVEREDCE, delivered); + } + return delivered; +} + /* This routine deals with incoming acks, but not outgoing ones. */ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { @@ -3619,7 +3635,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + delivered = tcp_newly_delivered(sk, delivered, flag); lost = tp->lost - lost; /* freshly marked lost */ rs.is_ack_delayed = !!(flag & FLAG_ACK_MAYBE_DELAYED); tcp_rate_gen(sk, delivered, lost, is_sack_reneg, sack_state.rate); @@ -3629,9 +3645,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ - if (flag & FLAG_DSACKING_ACK) + if (flag & FLAG_DSACKING_ACK) { tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); + } /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. @@ -3655,6 +3673,7 @@ old_ack: &sack_state); tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, &rexmit); + tcp_newly_delivered(sk, delivered, flag); tcp_xmit_recovery(sk, rexmit); } @@ -5567,9 +5586,12 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return true; } tp->syn_data_acked = tp->syn_data; - if (tp->syn_data_acked) - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENACTIVE); + if (tp->syn_data_acked) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVE); + /* SYN-data is counted as two separate packets in tcp_ack() */ + if (tp->delivered > 1) + --tp->delivered; + } tcp_fastopen_add_skb(sk, synack); @@ -5901,6 +5923,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } switch (sk->sk_state) { case TCP_SYN_RECV: + tp->delivered++; /* SYN-ACK delivery isn't tracked in tcp_ack */ if (!tp->srtt_us) tcp_synack_rtt_meas(sk, req); |