diff options
Diffstat (limited to 'net/ipv4/tcp_nv.c')
-rw-r--r-- | net/ipv4/tcp_nv.c | 47 |
1 files changed, 42 insertions, 5 deletions
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 1ff73982e28c..0b5a05bd82e3 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -39,7 +39,7 @@ * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 * nv_rtt_factor RTT averaging factor - * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur + * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping @@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2; static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ -static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ +static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */ static int nv_cwnd_growth_rate_neg __read_mostly = 8; static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ static int nv_dec_eval_min_calls __read_mostly = 60; @@ -101,6 +101,11 @@ struct tcpnv { u32 nv_last_rtt; /* last rtt */ u32 nv_min_rtt; /* active min rtt. Used to determine slope */ u32 nv_min_rtt_new; /* min rtt for future use */ + u32 nv_base_rtt; /* If non-zero it represents the threshold for + * congestion */ + u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is + * set to 80% of nv_base_rtt. It helps reduce + * unfairness between flows */ u32 nv_rtt_max_rate; /* max rate seen during current RTT */ u32 nv_rtt_start_seq; /* current RTT ends when packet arrives * acking beyond nv_rtt_start_seq */ @@ -132,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) static void tcpnv_init(struct sock *sk) { struct tcpnv *ca = inet_csk_ca(sk); + int base_rtt; tcpnv_reset(ca, sk); + /* See if base_rtt is available from socket_ops bpf program. + * It is meant to be used in environments, such as communication + * within a datacenter, where we have reasonable estimates of + * RTTs + */ + base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); + if (base_rtt > 0) { + ca->nv_base_rtt = base_rtt; + ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ + } else { + ca->nv_base_rtt = 0; + ca->nv_lower_bound_rtt = 0; + } + ca->nv_allow_cwnd_growth = 1; ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; ca->nv_min_rtt = NV_INIT_RTT; @@ -144,6 +164,19 @@ static void tcpnv_init(struct sock *sk) ca->cwnd_growth_factor = 0; } +/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt) + * bounds to RTT. + */ +inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val) +{ + if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt) + return ca->nv_lower_bound_rtt; + else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt) + return ca->nv_base_rtt; + else + return val; +} + static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -209,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) struct tcp_sock *tp = tcp_sk(sk); struct tcpnv *ca = inet_csk_ca(sk); unsigned long now = jiffies; - s64 rate64 = 0; + u64 rate64; u32 rate, max_win, cwnd_by_slope; u32 avg_rtt; u32 bytes_acked = 0; @@ -251,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) } /* rate in 100's bits per second */ - rate64 = ((u64)sample->in_flight) * 8000000; - rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); + rate64 = ((u64)sample->in_flight) * 80000; + do_div(rate64, avg_rtt ?: 1); + rate = (u32)rate64; /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be @@ -265,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) if (ca->nv_eval_call_cnt < 255) ca->nv_eval_call_cnt++; + /* Apply bounds to rtt. Only used to update min_rtt */ + avg_rtt = nv_get_bounded_rtt(ca, avg_rtt); + /* update min rtt if necessary */ if (avg_rtt < ca->nv_min_rtt) ca->nv_min_rtt = avg_rtt; |