diff options
author | Jens Axboe <axboe@kernel.dk> | 2019-04-22 09:47:36 -0600 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2019-04-22 09:47:36 -0600 |
commit | 5c61ee2cd5860e41c8ab98837761ffaa93eb4dfe (patch) | |
tree | 0c78e25f5020eeee47863092ccbb2a3f56bea8a9 /net/ipv4/tcp_dctcp.c | |
parent | cdf3e3deb747d5e193dee617ed37c83060eb576f (diff) | |
parent | 085b7755808aa11f78ab9377257e1dad2e6fa4bb (diff) | |
download | linux-5c61ee2cd5860e41c8ab98837761ffaa93eb4dfe.tar.bz2 |
Merge tag 'v5.1-rc6' into for-5.2/block
Pull in v5.1-rc6 to resolve two conflicts. One is in BFQ, in just a
comment, and is trivial. The other one is a conflict due to a later fix
in the bio multi-page work, and needs a bit more care.
* tag 'v5.1-rc6': (770 commits)
Linux 5.1-rc6
block: make sure that bvec length can't be overflow
block: kill all_q_node in request_queue
x86/cpu/intel: Lower the "ENERGY_PERF_BIAS: Set to normal" message's log priority
coredump: fix race condition between mmget_not_zero()/get_task_mm() and core dumping
mm/kmemleak.c: fix unused-function warning
init: initialize jump labels before command line option parsing
kernel/watchdog_hld.c: hard lockup message should end with a newline
kcov: improve CONFIG_ARCH_HAS_KCOV help text
mm: fix inactive list balancing between NUMA nodes and cgroups
mm/hotplug: treat CMA pages as unmovable
proc: fixup proc-pid-vm test
proc: fix map_files test on F29
mm/vmstat.c: fix /proc/vmstat format for CONFIG_DEBUG_TLBFLUSH=y CONFIG_SMP=n
mm/memory_hotplug: do not unlock after failing to take the device_hotplug_lock
mm: swapoff: shmem_unuse() stop eviction without igrab()
mm: swapoff: take notice of completion sooner
mm: swapoff: remove too limiting SWAP_UNUSE_MAX_TRIES
mm: swapoff: shmem_find_swap_entries() filter out other types
slab: store tagged freelist for off-slab slabmgmt
...
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'net/ipv4/tcp_dctcp.c')
-rw-r--r-- | net/ipv4/tcp_dctcp.c | 81 |
1 files changed, 35 insertions, 46 deletions
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index cd4814f7e962..477cb4aa456c 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -49,9 +49,8 @@ #define DCTCP_MAX_ALPHA 1024U struct dctcp { - u32 acked_bytes_ecn; - u32 acked_bytes_total; - u32 prior_snd_una; + u32 old_delivered; + u32 old_delivered_ce; u32 prior_rcv_nxt; u32 dctcp_alpha; u32 next_seq; @@ -67,19 +66,14 @@ static unsigned int dctcp_alpha_on_init __read_mostly = DCTCP_MAX_ALPHA; module_param(dctcp_alpha_on_init, uint, 0644); MODULE_PARM_DESC(dctcp_alpha_on_init, "parameter for initial alpha value"); -static unsigned int dctcp_clamp_alpha_on_loss __read_mostly; -module_param(dctcp_clamp_alpha_on_loss, uint, 0644); -MODULE_PARM_DESC(dctcp_clamp_alpha_on_loss, - "parameter for clamping alpha on loss"); - static struct tcp_congestion_ops dctcp_reno; static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca) { ca->next_seq = tp->snd_nxt; - ca->acked_bytes_ecn = 0; - ca->acked_bytes_total = 0; + ca->old_delivered = tp->delivered; + ca->old_delivered_ce = tp->delivered_ce; } static void dctcp_init(struct sock *sk) @@ -91,7 +85,6 @@ static void dctcp_init(struct sock *sk) sk->sk_state == TCP_CLOSE)) { struct dctcp *ca = inet_csk_ca(sk); - ca->prior_snd_una = tp->snd_una; ca->prior_rcv_nxt = tp->rcv_nxt; ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA); @@ -123,37 +116,25 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) { const struct tcp_sock *tp = tcp_sk(sk); struct dctcp *ca = inet_csk_ca(sk); - u32 acked_bytes = tp->snd_una - ca->prior_snd_una; - - /* If ack did not advance snd_una, count dupack as MSS size. - * If ack did update window, do not count it at all. - */ - if (acked_bytes == 0 && !(flags & CA_ACK_WIN_UPDATE)) - acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss; - if (acked_bytes) { - ca->acked_bytes_total += acked_bytes; - ca->prior_snd_una = tp->snd_una; - - if (flags & CA_ACK_ECE) - ca->acked_bytes_ecn += acked_bytes; - } /* Expired RTT */ if (!before(tp->snd_una, ca->next_seq)) { - u64 bytes_ecn = ca->acked_bytes_ecn; + u32 delivered_ce = tp->delivered_ce - ca->old_delivered_ce; u32 alpha = ca->dctcp_alpha; /* alpha = (1 - g) * alpha + g * F */ alpha -= min_not_zero(alpha, alpha >> dctcp_shift_g); - if (bytes_ecn) { + if (delivered_ce) { + u32 delivered = tp->delivered - ca->old_delivered; + /* If dctcp_shift_g == 1, a 32bit value would overflow - * after 8 Mbytes. + * after 8 M packets. */ - bytes_ecn <<= (10 - dctcp_shift_g); - do_div(bytes_ecn, max(1U, ca->acked_bytes_total)); + delivered_ce <<= (10 - dctcp_shift_g); + delivered_ce /= max(1U, delivered); - alpha = min(alpha + (u32)bytes_ecn, DCTCP_MAX_ALPHA); + alpha = min(alpha + delivered_ce, DCTCP_MAX_ALPHA); } /* dctcp_alpha can be read from dctcp_get_info() without * synchro, so we ask compiler to not use dctcp_alpha @@ -164,21 +145,23 @@ static void dctcp_update_alpha(struct sock *sk, u32 flags) } } -static void dctcp_state(struct sock *sk, u8 new_state) +static void dctcp_react_to_loss(struct sock *sk) { - if (dctcp_clamp_alpha_on_loss && new_state == TCP_CA_Loss) { - struct dctcp *ca = inet_csk_ca(sk); + struct dctcp *ca = inet_csk_ca(sk); + struct tcp_sock *tp = tcp_sk(sk); - /* If this extension is enabled, we clamp dctcp_alpha to - * max on packet loss; the motivation is that dctcp_alpha - * is an indicator to the extend of congestion and packet - * loss is an indicator of extreme congestion; setting - * this in practice turned out to be beneficial, and - * effectively assumes total congestion which reduces the - * window by half. - */ - ca->dctcp_alpha = DCTCP_MAX_ALPHA; - } + ca->loss_cwnd = tp->snd_cwnd; + tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U); +} + +static void dctcp_state(struct sock *sk, u8 new_state) +{ + if (new_state == TCP_CA_Recovery && + new_state != inet_csk(sk)->icsk_ca_state) + dctcp_react_to_loss(sk); + /* We handle RTO in dctcp_cwnd_event to ensure that we perform only + * one loss-adjustment per RTT. + */ } static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) @@ -190,6 +173,9 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) case CA_EVENT_ECN_NO_CE: dctcp_ece_ack_update(sk, ev, &ca->prior_rcv_nxt, &ca->ce_state); break; + case CA_EVENT_LOSS: + dctcp_react_to_loss(sk); + break; default: /* Don't care for the rest. */ break; @@ -200,6 +186,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { const struct dctcp *ca = inet_csk_ca(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Fill it also in case of VEGASINFO due to req struct limits. * We can still correctly retrieve it later. @@ -211,8 +198,10 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, info->dctcp.dctcp_enabled = 1; info->dctcp.dctcp_ce_state = (u16) ca->ce_state; info->dctcp.dctcp_alpha = ca->dctcp_alpha; - info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn; - info->dctcp.dctcp_ab_tot = ca->acked_bytes_total; + info->dctcp.dctcp_ab_ecn = tp->mss_cache * + (tp->delivered_ce - ca->old_delivered_ce); + info->dctcp.dctcp_ab_tot = tp->mss_cache * + (tp->delivered - ca->old_delivered); } *attr = INET_DIAG_DCTCPINFO; |