summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@linux-foundation.org>2007-04-23 22:26:16 -0700
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-25 22:29:45 -0700
commit164891aadf1721fca4dce473bb0e0998181537c6 (patch)
tree991393ec7306da475cb306fcc7cb084f737ebadc
parent65d1b4a7e73fe0e1f5275ad7d2d3547981480886 (diff)
downloadlinux-164891aadf1721fca4dce473bb0e0998181537c6.tar.bz2
[TCP]: Congestion control API update.
Do some simple changes to make congestion control API faster/cleaner. * use ktime_t rather than timeval * merge rtt sampling into existing ack callback this means one indirect call versus two per ack. * use flags bits to store options/settings Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/skbuff.h5
-rw-r--r--include/net/tcp.h9
-rw-r--r--net/ipv4/tcp_bic.c2
-rw-r--r--net/ipv4/tcp_cong.c14
-rw-r--r--net/ipv4/tcp_cubic.c2
-rw-r--r--net/ipv4/tcp_htcp.c2
-rw-r--r--net/ipv4/tcp_illinois.c16
-rw-r--r--net/ipv4/tcp_input.c25
-rw-r--r--net/ipv4/tcp_lp.c8
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/tcp_vegas.c10
-rw-r--r--net/ipv4/tcp_veno.c10
-rw-r--r--net/ipv4/tcp_westwood.c2
-rw-r--r--net/ipv4/tcp_yeah.c6
-rw-r--r--net/ipv4/tcp_yeah.h7
15 files changed, 65 insertions, 55 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 50f6f6a094cf..2694cb3ca763 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1569,6 +1569,11 @@ static inline void __net_timestamp(struct sk_buff *skb)
skb->tstamp = ktime_get_real();
}
+static inline ktime_t net_timedelta(ktime_t t)
+{
+ return ktime_sub(ktime_get_real(), t);
+}
+
extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 43910fe3c448..a385797f160a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -629,9 +629,12 @@ enum tcp_ca_event {
#define TCP_CA_MAX 128
#define TCP_CA_BUF_MAX (TCP_CA_NAME_MAX*TCP_CA_MAX)
+#define TCP_CONG_NON_RESTRICTED 0x1
+#define TCP_CONG_RTT_STAMP 0x2
+
struct tcp_congestion_ops {
struct list_head list;
- int non_restricted;
+ unsigned long flags;
/* initialize private data (optional) */
void (*init)(struct sock *sk);
@@ -645,8 +648,6 @@ struct tcp_congestion_ops {
/* do new cwnd calculation (required) */
void (*cong_avoid)(struct sock *sk, u32 ack,
u32 rtt, u32 in_flight, int good_ack);
- /* round trip time sample per acked packet (optional) */
- void (*rtt_sample)(struct sock *sk, u32 usrtt);
/* call before changing ca_state (optional) */
void (*set_state)(struct sock *sk, u8 new_state);
/* call when cwnd event occurs (optional) */
@@ -654,7 +655,7 @@ struct tcp_congestion_ops {
/* new value of cwnd after loss (optional) */
u32 (*undo_cwnd)(struct sock *sk);
/* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, u32 num_acked);
+ void (*pkts_acked)(struct sock *sk, u32 num_acked, ktime_t last);
/* get info for inet_diag (optional) */
void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c
index 5730333cd0ac..281c9f913257 100644
--- a/net/ipv4/tcp_bic.c
+++ b/net/ipv4/tcp_bic.c
@@ -206,7 +206,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ccd88407e0cd..86b26539e54b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -126,7 +126,7 @@ int tcp_set_default_congestion_control(const char *name)
#endif
if (ca) {
- ca->non_restricted = 1; /* default is always allowed */
+ ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */
list_move(&ca->list, &tcp_cong_list);
ret = 0;
}
@@ -181,7 +181,7 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
*buf = '\0';
rcu_read_lock();
list_for_each_entry_rcu(ca, &tcp_cong_list, list) {
- if (!ca->non_restricted)
+ if (!(ca->flags & TCP_CONG_NON_RESTRICTED))
continue;
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
@@ -212,16 +212,16 @@ int tcp_set_allowed_congestion_control(char *val)
}
}
- /* pass 2 clear */
+ /* pass 2 clear old values */
list_for_each_entry_rcu(ca, &tcp_cong_list, list)
- ca->non_restricted = 0;
+ ca->flags &= ~TCP_CONG_NON_RESTRICTED;
/* pass 3 mark as allowed */
while ((name = strsep(&val, " ")) && *name) {
ca = tcp_ca_find(name);
WARN_ON(!ca);
if (ca)
- ca->non_restricted = 1;
+ ca->flags |= TCP_CONG_NON_RESTRICTED;
}
out:
spin_unlock(&tcp_cong_list_lock);
@@ -256,7 +256,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
if (!ca)
err = -ENOENT;
- else if (!(ca->non_restricted || capable(CAP_NET_ADMIN)))
+ else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || capable(CAP_NET_ADMIN)))
err = -EPERM;
else if (!try_module_get(ca->owner))
@@ -371,8 +371,8 @@ u32 tcp_reno_min_cwnd(const struct sock *sk)
EXPORT_SYMBOL_GPL(tcp_reno_min_cwnd);
struct tcp_congestion_ops tcp_reno = {
+ .flags = TCP_CONG_NON_RESTRICTED,
.name = "reno",
- .non_restricted = 1,
.owner = THIS_MODULE,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_reno_cong_avoid,
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 296845be912b..14224487b16b 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -334,7 +334,7 @@ static void bictcp_state(struct sock *sk, u8 new_state)
/* Track delayed acknowledgment ratio using sliding window
* ratio = (15*ratio + sample) / 16
*/
-static void bictcp_acked(struct sock *sk, u32 cnt)
+static void bictcp_acked(struct sock *sk, u32 cnt, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c
index 1020eb48d8d1..4ba4a7ae0a85 100644
--- a/net/ipv4/tcp_htcp.c
+++ b/net/ipv4/tcp_htcp.c
@@ -98,7 +98,7 @@ static inline void measure_rtt(struct sock *sk)
}
}
-static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked)
+static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index ae6298600886..8e3165917f72 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -83,9 +83,14 @@ static void tcp_illinois_init(struct sock *sk)
}
/* Measure RTT for each ack. */
-static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
+static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
{
struct illinois *ca = inet_csk_ca(sk);
+ u32 rtt;
+
+ ca->acked = pkts_acked;
+
+ rtt = ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC;
/* ignore bogus values, this prevents wraparound in alpha math */
if (rtt > RTT_MAX)
@@ -103,13 +108,6 @@ static void tcp_illinois_rtt_sample(struct sock *sk, u32 rtt)
ca->sum_rtt += rtt;
}
-/* Capture count of packets covered by ack, to adjust for delayed acks */
-static void tcp_illinois_acked(struct sock *sk, u32 pkts_acked)
-{
- struct illinois *ca = inet_csk_ca(sk);
- ca->acked = pkts_acked;
-}
-
/* Maximum queuing delay */
static inline u32 max_delay(const struct illinois *ca)
{
@@ -325,12 +323,12 @@ static void tcp_illinois_info(struct sock *sk, u32 ext,
}
static struct tcp_congestion_ops tcp_illinois = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_illinois_init,
.ssthresh = tcp_illinois_ssthresh,
.min_cwnd = tcp_reno_min_cwnd,
.cong_avoid = tcp_illinois_cong_avoid,
.set_state = tcp_illinois_state,
- .rtt_sample = tcp_illinois_rtt_sample,
.get_info = tcp_illinois_info,
.pkts_acked = tcp_illinois_acked,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 633389390788..051f0f815f17 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2402,14 +2402,6 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
return acked;
}
-static u32 tcp_usrtt(struct timeval *tv)
-{
- struct timeval now;
-
- do_gettimeofday(&now);
- return (now.tv_sec - tv->tv_sec) * 1000000 + (now.tv_usec - tv->tv_usec);
-}
-
/* Remove acknowledged frames from the retransmission queue. */
static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
{
@@ -2420,9 +2412,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
int acked = 0;
__s32 seq_rtt = -1;
u32 pkts_acked = 0;
- void (*rtt_sample)(struct sock *sk, u32 usrtt)
- = icsk->icsk_ca_ops->rtt_sample;
- struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
+ ktime_t last_ackt = ktime_set(0,0);
while ((skb = tcp_write_queue_head(sk)) &&
skb != tcp_send_head(sk)) {
@@ -2471,7 +2461,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
seq_rtt = -1;
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
if (sacked & TCPCB_SACKED_ACKED)
tp->sacked_out -= tcp_skb_pcount(skb);
@@ -2484,7 +2474,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
} else if (seq_rtt < 0) {
seq_rtt = now - scb->when;
- skb_get_timestamp(skb, &tv);
+ last_ackt = skb->tstamp;
}
tcp_dec_pcount_approx(&tp->fackets_out, skb);
tcp_packets_out_dec(tp, skb);
@@ -2494,13 +2484,14 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
if (acked&FLAG_ACKED) {
+ const struct tcp_congestion_ops *ca_ops
+ = inet_csk(sk)->icsk_ca_ops;
+
tcp_ack_update_rtt(sk, acked, seq_rtt);
tcp_ack_packets_out(sk);
- if (rtt_sample && !(acked & FLAG_RETRANS_DATA_ACKED))
- (*rtt_sample)(sk, tcp_usrtt(&tv));
- if (icsk->icsk_ca_ops->pkts_acked)
- icsk->icsk_ca_ops->pkts_acked(sk, pkts_acked);
+ if (ca_ops->pkts_acked)
+ ca_ops->pkts_acked(sk, pkts_acked, last_ackt);
}
#if FASTRETRANS_DEBUG > 0
diff --git a/net/ipv4/tcp_lp.c b/net/ipv4/tcp_lp.c
index f0ebaf0e21cb..b4e062ab24a1 100644
--- a/net/ipv4/tcp_lp.c
+++ b/net/ipv4/tcp_lp.c
@@ -218,7 +218,7 @@ static u32 tcp_lp_owd_calculator(struct sock *sk)
* 3. calc smoothed OWD (SOWD).
* Most ideas come from the original TCP-LP implementation.
*/
-static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
+static void tcp_lp_rtt_sample(struct sock *sk, u32 rtt)
{
struct lp *lp = inet_csk_ca(sk);
s64 mowd = tcp_lp_owd_calculator(sk);
@@ -261,11 +261,13 @@ static void tcp_lp_rtt_sample(struct sock *sk, u32 usrtt)
* newReno in increase case.
* We work it out by following the idea from TCP-LP's paper directly
*/
-static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
+static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked, ktime_t last)
{
struct tcp_sock *tp = tcp_sk(sk);
struct lp *lp = inet_csk_ca(sk);
+ tcp_lp_rtt_sample(sk, ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC);
+
/* calc inference */
if (tcp_time_stamp > tp->rx_opt.rcv_tsecr)
lp->inference = 3 * (tcp_time_stamp - tp->rx_opt.rcv_tsecr);
@@ -312,11 +314,11 @@ static void tcp_lp_pkts_acked(struct sock *sk, u32 num_acked)
}
static struct tcp_congestion_ops tcp_lp = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_lp_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_lp_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_lp_rtt_sample,
.pkts_acked = tcp_lp_pkts_acked,
.owner = THIS_MODULE,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3a60aea744ae..e70a6840cb64 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -409,7 +409,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
/* If congestion control is doing timestamping, we must
* take such a timestamp before we potentially clone/copy.
*/
- if (icsk->icsk_ca_ops->rtt_sample)
+ if (icsk->icsk_ca_ops->flags & TCP_CONG_RTT_STAMP)
__net_timestamp(skb);
if (likely(clone_it)) {
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index 87e72bef6d08..f4104eeb5f26 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -120,10 +120,13 @@ static void tcp_vegas_init(struct sock *sk)
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct vegas *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
/* Filter to find propagation delay: */
if (vrtt < vegas->baseRTT)
@@ -353,11 +356,12 @@ static void tcp_vegas_get_info(struct sock *sk, u32 ext,
}
static struct tcp_congestion_ops tcp_vegas = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_vegas_init,
.ssthresh = tcp_reno_ssthresh,
.cong_avoid = tcp_vegas_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_vegas_rtt_calc,
+ .pkts_acked = tcp_vegas_pkts_acked,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c
index ce57bf302f6c..0b50d0607a0e 100644
--- a/net/ipv4/tcp_veno.c
+++ b/net/ipv4/tcp_veno.c
@@ -69,10 +69,13 @@ static void tcp_veno_init(struct sock *sk)
}
/* Do rtt sampling needed for Veno. */
-static void tcp_veno_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_veno_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct veno *veno = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or basertt */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
/* Filter to find propagation delay: */
if (vrtt < veno->basertt)
@@ -199,10 +202,11 @@ static u32 tcp_veno_ssthresh(struct sock *sk)
}
static struct tcp_congestion_ops tcp_veno = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_veno_init,
.ssthresh = tcp_veno_ssthresh,
.cong_avoid = tcp_veno_cong_avoid,
- .rtt_sample = tcp_veno_rtt_calc,
+ .pkts_acked = tcp_veno_pkts_acked,
.set_state = tcp_veno_state,
.cwnd_event = tcp_veno_cwnd_event,
diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c
index ae1026a67720..e61e09dd513e 100644
--- a/net/ipv4/tcp_westwood.c
+++ b/net/ipv4/tcp_westwood.c
@@ -100,7 +100,7 @@ static void westwood_filter(struct westwood *w, u32 delta)
* Called after processing group of packets.
* but all westwood needs is the last sample of srtt.
*/
-static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt)
+static void tcp_westwood_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct westwood *w = inet_csk_ca(sk);
if (cnt > 0)
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index 46dd1bee583a..81ef02c1649a 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -64,13 +64,15 @@ static void tcp_yeah_init(struct sock *sk)
}
-static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked)
+static void tcp_yeah_pkts_acked(struct sock *sk, u32 pkts_acked, ktime_t last)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct yeah *yeah = inet_csk_ca(sk);
if (icsk->icsk_ca_state == TCP_CA_Open)
yeah->pkts_acked = pkts_acked;
+
+ tcp_vegas_pkts_acked(sk, pkts_acked, last);
}
static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack,
@@ -237,11 +239,11 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) {
}
static struct tcp_congestion_ops tcp_yeah = {
+ .flags = TCP_CONG_RTT_STAMP,
.init = tcp_yeah_init,
.ssthresh = tcp_yeah_ssthresh,
.cong_avoid = tcp_yeah_cong_avoid,
.min_cwnd = tcp_reno_min_cwnd,
- .rtt_sample = tcp_vegas_rtt_calc,
.set_state = tcp_vegas_state,
.cwnd_event = tcp_vegas_cwnd_event,
.get_info = tcp_vegas_get_info,
diff --git a/net/ipv4/tcp_yeah.h b/net/ipv4/tcp_yeah.h
index a62d82038fd0..33ad5385c188 100644
--- a/net/ipv4/tcp_yeah.h
+++ b/net/ipv4/tcp_yeah.h
@@ -81,10 +81,13 @@ static void tcp_vegas_state(struct sock *sk, u8 ca_state)
* o min-filter RTT samples from a much longer window (forever for now)
* to find the propagation delay (baseRTT)
*/
-static void tcp_vegas_rtt_calc(struct sock *sk, u32 usrtt)
+static void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, ktime_t last)
{
struct vegas *vegas = inet_csk_ca(sk);
- u32 vrtt = usrtt + 1; /* Never allow zero rtt or baseRTT */
+ u32 vrtt;
+
+ /* Never allow zero rtt or baseRTT */
+ vrtt = (ktime_to_ns(net_timedelta(last)) / NSEC_PER_USEC) + 1;
/* Filter to find propagation delay: */
if (vrtt < vegas->baseRTT)