[Beowulf] Kernel action relevant to us
Walid
walid.shaari at gmail.com
Fri Aug 13 23:16:19 PDT 2010
Greg,
do we know if that have made it to any Linux Kernel?
kind regards
Walid
On 17 December 2009 05:05, Greg Lindahl <lindahl at pbm.com> wrote:
> The following patch, not yet accepted into the kernel, should allow
> local TCP connections to start up faster, while remote ones keep the
> same behavior of slow start.
>
> ----- Forwarded message from chavey at google.com -----
>
> From: chavey at google.com
> Date: Tue, 15 Dec 2009 13:15:28 -0800
> To: davem at davemloft.net
> CC: netdev at vger.kernel.org, therbert at google.com, chavey at google.com,
> eric.dumazet at gmail.com
> Subject: [PATCH] Add rtnetlink init_rcvwnd to set the TCP initial receive
> window
> X-Mailing-List: netdev at vger.kernel.org
>
> Add rtnetlink init_rcvwnd to set the TCP initial receive window size
> advertised by passive and active TCP connections.
> The current Linux TCP implementation limits the advertised TCP initial
> receive window to the one prescribed by slow start. For short lived
> TCP connections used for transaction type of traffic (i.e. http
> requests), bounding the advertised TCP initial receive window results
> in increased latency to complete the transaction.
> Support for setting initial congestion window is already supported
> using rtnetlink init_cwnd, but the feature is useless without the
> ability to set a larger TCP initial receive window.
> The rtnetlink init_rcvwnd allows increasing the TCP initial receive
> window, allowing TCP connection to advertise larger TCP receive window
> than the ones bounded by slow start.
>
> Signed-off-by: Laurent Chavey <chavey at google.com>
> ---
> include/linux/rtnetlink.h | 2 ++
> include/net/dst.h | 2 --
> include/net/tcp.h | 3 ++-
> net/ipv4/syncookies.c | 3 ++-
> net/ipv4/tcp_output.c | 17 +++++++++++++----
> net/ipv6/syncookies.c | 3 ++-
> 6 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index adf2068..db6f614 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -371,6 +371,8 @@ enum
> #define RTAX_FEATURES RTAX_FEATURES
> RTAX_RTO_MIN,
> #define RTAX_RTO_MIN RTAX_RTO_MIN
> + RTAX_INITRWND,
> +#define RTAX_INITRWND RTAX_INITRWND
> __RTAX_MAX
> };
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 5a900dd..6ef812a 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -84,8 +84,6 @@ struct dst_entry
> * (L1_CACHE_SIZE would be too much)
> */
> #ifdef CONFIG_64BIT
> - long __pad_to_align_refcnt[2];
> -#else
> long __pad_to_align_refcnt[1];
> #endif
> /*
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 03a49c7..6f95d32 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -972,7 +972,8 @@ static inline void tcp_sack_reset(struct
> tcp_options_received *rx_opt)
> /* Determine a window scaling and initial window to offer. */
> extern void tcp_select_initial_window(int __space, __u32 mss,
> __u32 *rcv_wnd, __u32 *window_clamp,
> - int wscale_ok, __u8 *rcv_wscale);
> + int wscale_ok, __u8 *rcv_wscale,
> + __u32 init_rcv_wnd);
>
> static inline int tcp_win_from_space(int space)
> {
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index a6e0e07..d43173c 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -356,7 +356,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct
> sk_buff *skb,
>
> tcp_select_initial_window(tcp_full_space(sk), req->mss,
> &req->rcv_wnd, &req->window_clamp,
> - ireq->wscale_ok, &rcv_wscale);
> + ireq->wscale_ok, &rcv_wscale,
> + dst_metric(&rt->u.dst, RTAX_INITRWND));
>
> ireq->rcv_wscale = rcv_wscale;
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index fcd278a..ee42c75 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -179,7 +179,8 @@ static inline void tcp_event_ack_sent(struct sock *sk,
> unsigned int pkts)
> */
> void tcp_select_initial_window(int __space, __u32 mss,
> __u32 *rcv_wnd, __u32 *window_clamp,
> - int wscale_ok, __u8 *rcv_wscale)
> + int wscale_ok, __u8 *rcv_wscale,
> + __u32 init_rcv_wnd)
> {
> unsigned int space = (__space < 0 ? 0 : __space);
>
> @@ -228,7 +229,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
> init_cwnd = 2;
> else if (mss > 1460)
> init_cwnd = 3;
> - if (*rcv_wnd > init_cwnd * mss)
> + /* when initializing use the value from init_rcv_wnd
> + * rather than the default from above
> + */
> + if (init_rcv_wnd &&
> + (*rcv_wnd > init_rcv_wnd * mss))
> + *rcv_wnd = init_rcv_wnd * mss;
> + else if (*rcv_wnd > init_cwnd * mss)
> *rcv_wnd = init_cwnd * mss;
> }
>
> @@ -2254,7 +2261,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk,
> struct dst_entry *dst,
> &req->rcv_wnd,
> &req->window_clamp,
> ireq->wscale_ok,
> - &rcv_wscale);
> + &rcv_wscale,
> + dst_metric(dst, RTAX_INITRWND));
> ireq->rcv_wscale = rcv_wscale;
> }
>
> @@ -2342,7 +2350,8 @@ static void tcp_connect_init(struct sock *sk)
> &tp->rcv_wnd,
> &tp->window_clamp,
> sysctl_tcp_window_scaling,
> - &rcv_wscale);
> + &rcv_wscale,
> + dst_metric(dst, RTAX_INITRWND));
>
> tp->rx_opt.rcv_wscale = rcv_wscale;
> tp->rcv_ssthresh = tp->rcv_wnd;
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index 6b6ae91..c8982aa 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -267,7 +267,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct
> sk_buff *skb)
> req->window_clamp = tp->window_clamp ? :dst_metric(dst,
> RTAX_WINDOW);
> tcp_select_initial_window(tcp_full_space(sk), req->mss,
> &req->rcv_wnd, &req->window_clamp,
> - ireq->wscale_ok, &rcv_wscale);
> + ireq->wscale_ok, &rcv_wscale,
> + dst_metric(dst, RTAX_INITRWND));
>
> ireq->rcv_wscale = rcv_wscale;
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
> ----- End forwarded message -----
> _______________________________________________
> Beowulf mailing list, Beowulf at beowulf.org sponsored by Penguin Computing
> To change your subscription (digest mode or unsubscribe) visit
> http://www.beowulf.org/mailman/listinfo/beowulf
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.beowulf.org/pipermail/beowulf/attachments/20100814/1560915f/attachment.html>
More information about the Beowulf
mailing list