Skip to content

Commit

Permalink
Revert "tcp: Apply device TSO segment limit earlier"
Browse files Browse the repository at this point in the history
This reverts commit 9f871e883277cc22c6217db806376dce52401a31, which
was commit 1485348d2424e1131ea42efc033cbd9366462b01 upstream.

It can cause connections to stall when a PMTU event occurs:
Thomas Jarosch reported IPsec TCP stalls when a PMTU event occurs.

In fact the problem was completely unrelated to IPsec. The bug is
also reproducible if you just disable TSO/GSO.

The problem is that when the MSS goes down, existing queued packet
on the TX queue that have not been transmitted yet all look like
TSO packets and get treated as such.

This was fixed by commit 843925f33fcc ("tcp: Do not apply TSO segment limit to
non-TSO packets") upstream, but that depends on other changes to TSO.

The original issue this fixed was a performance regression for the sfc
driver in extreme cases of TSO (skb with > 100 segments).  This is not
really very important and it seems best to revert it rather than try
to fix it up.

Signed-off-by: Ben Hutchings <[email protected]>
Cc: Herbert Xu <[email protected]>
Cc: [email protected]
Cc: [email protected]
  • Loading branch information
bwhacks authored and andy-padavan committed Oct 9, 2017
1 parent 407a6ea commit cb6d820
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 20 deletions.
2 changes: 0 additions & 2 deletions trunk/linux-3.4.x/include/net/sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ struct cg_proto;
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_gso_max_size: Maximum GSO segment size to build
* @sk_gso_max_segs: Maximum number of GSO segments
* @sk_lingertime: %SO_LINGER l_linger setting
* @sk_backlog: always used with the per-socket spinlock held
* @sk_callback_lock: used with the callbacks in the end of this struct
Expand Down Expand Up @@ -331,7 +330,6 @@ struct sock {
netdev_features_t sk_route_nocaps;
int sk_gso_type;
unsigned int sk_gso_max_size;
u16 sk_gso_max_segs;
int sk_rcvlowat;
unsigned long sk_lingertime;
struct sk_buff_head sk_error_queue;
Expand Down
1 change: 0 additions & 1 deletion trunk/linux-3.4.x/net/core/sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1402,7 +1402,6 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
sk->sk_gso_max_size = dst->dev->gso_max_size;
sk->sk_gso_max_segs = dst->dev->gso_max_segs;
}
}
}
Expand Down
4 changes: 1 addition & 3 deletions trunk/linux-3.4.x/net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -744,9 +744,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
old_size_goal + mss_now > xmit_size_goal)) {
xmit_size_goal = old_size_goal;
} else {
tp->xmit_size_goal_segs =
min_t(u16, xmit_size_goal / mss_now,
sk->sk_gso_max_segs);
tp->xmit_size_goal_segs = xmit_size_goal / mss_now;
xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
}
}
Expand Down
3 changes: 1 addition & 2 deletions trunk/linux-3.4.x/net/ipv4/tcp_cong.c
Original file line number Diff line number Diff line change
Expand Up @@ -291,8 +291,7 @@ int tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
left * tp->mss_cache < sk->sk_gso_max_size &&
left < sk->sk_gso_max_segs)
left * tp->mss_cache < sk->sk_gso_max_size)
return 1;
return left <= tcp_max_tso_deferred_mss(tp);
}
Expand Down
21 changes: 9 additions & 12 deletions trunk/linux-3.4.x/net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -1205,21 +1205,21 @@ static void tcp_cwnd_validate(struct sock *sk)
* when we would be allowed to send the split-due-to-Nagle skb fully.
*/
static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb,
unsigned int mss_now, unsigned int max_segs)
unsigned int mss_now, unsigned int cwnd)
{
const struct tcp_sock *tp = tcp_sk(sk);
u32 needed, window, max_len;
u32 needed, window, cwnd_len;

window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq;
max_len = mss_now * max_segs;
cwnd_len = mss_now * cwnd;

if (likely(max_len <= window && skb != tcp_write_queue_tail(sk)))
return max_len;
if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
return cwnd_len;

needed = min(skb->len, window);

if (max_len <= needed)
return max_len;
if (cwnd_len <= needed)
return cwnd_len;

return needed - needed % mss_now;
}
Expand Down Expand Up @@ -1448,8 +1448,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
limit = min(send_win, cong_win);

/* If a full-sized TSO skb can be sent, do it. */
if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
sk->sk_gso_max_segs * tp->mss_cache))
if (limit >= sk->sk_gso_max_size)
goto send_now;

/* Middle in queue won't get any more data, full sendable already? */
Expand Down Expand Up @@ -1675,9 +1674,7 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
min_t(unsigned int,
cwnd_quota,
sk->sk_gso_max_segs));
cwnd_quota);

if (skb->len > limit &&
unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
Expand Down

0 comments on commit cb6d820

Please sign in to comment.