Archives


- Beowulf
- Beowulf Announce
- Scyld-users
- Beowulf on Debian

[Beowulf] Kernel action relevant to us

Many of your questions may have already been answered in earlier discussions or in the FAQ. The search results page will indicate current discussions as well as past list serves, articles, and papers.

Search

Walid walid.shaari at gmail.com
Fri Aug 13 23:16:19 PDT 2010


Greg,

do we know if that have made it to any Linux Kernel?

kind regards

Walid

On 17 December 2009 05:05, Greg Lindahl <lindahl at pbm.com> wrote:

> The following patch, not yet accepted into the kernel, should allow
> local TCP connections to start up faster, while remote ones keep the
> same behavior of slow start.
>
> ----- Forwarded message from chavey at google.com -----
>
> From: chavey at google.com
> Date: Tue, 15 Dec 2009 13:15:28 -0800
> To: davem at davemloft.net
> CC: netdev at vger.kernel.org, therbert at google.com, chavey at google.com,
>        eric.dumazet at gmail.com
> Subject: [PATCH] Add rtnetlink init_rcvwnd to set the TCP initial receive
> window
> X-Mailing-List: netdev at vger.kernel.org
>
> Add rtnetlink init_rcvwnd to set the TCP initial receive window size
> advertised by passive and active TCP connections.
> The current Linux TCP implementation limits the advertised TCP initial
> receive window to the one prescribed by slow start. For short lived
> TCP connections used for transaction type of traffic (i.e. http
> requests), bounding the advertised TCP initial receive window results
> in increased latency to complete the transaction.
> Support for setting initial congestion window is already supported
> using rtnetlink init_cwnd, but the feature is useless without the
> ability to set a larger TCP initial receive window.
> The rtnetlink init_rcvwnd allows increasing the TCP initial receive
> window, allowing TCP connection to advertise larger TCP receive window
> than the ones bounded by slow start.
>
> Signed-off-by: Laurent Chavey <chavey at google.com>
> ---
>  include/linux/rtnetlink.h |    2 ++
>  include/net/dst.h         |    2 --
>  include/net/tcp.h         |    3 ++-
>  net/ipv4/syncookies.c     |    3 ++-
>  net/ipv4/tcp_output.c     |   17 +++++++++++++----
>  net/ipv6/syncookies.c     |    3 ++-
>  6 files changed, 21 insertions(+), 9 deletions(-)
>
> diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
> index adf2068..db6f614 100644
> --- a/include/linux/rtnetlink.h
> +++ b/include/linux/rtnetlink.h
> @@ -371,6 +371,8 @@ enum
>  #define RTAX_FEATURES RTAX_FEATURES
>        RTAX_RTO_MIN,
>  #define RTAX_RTO_MIN RTAX_RTO_MIN
> +       RTAX_INITRWND,
> +#define RTAX_INITRWND RTAX_INITRWND
>        __RTAX_MAX
>  };
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 5a900dd..6ef812a 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -84,8 +84,6 @@ struct dst_entry
>         * (L1_CACHE_SIZE would be too much)
>         */
>  #ifdef CONFIG_64BIT
> -       long                    __pad_to_align_refcnt[2];
> -#else
>        long                    __pad_to_align_refcnt[1];
>  #endif
>        /*
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index 03a49c7..6f95d32 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -972,7 +972,8 @@ static inline void tcp_sack_reset(struct
> tcp_options_received *rx_opt)
>  /* Determine a window scaling and initial window to offer. */
>  extern void tcp_select_initial_window(int __space, __u32 mss,
>                                      __u32 *rcv_wnd, __u32 *window_clamp,
> -                                     int wscale_ok, __u8 *rcv_wscale);
> +                                     int wscale_ok, __u8 *rcv_wscale,
> +                                     __u32 init_rcv_wnd);
>
>  static inline int tcp_win_from_space(int space)
>  {
> diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
> index a6e0e07..d43173c 100644
> --- a/net/ipv4/syncookies.c
> +++ b/net/ipv4/syncookies.c
> @@ -356,7 +356,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct
> sk_buff *skb,
>
>        tcp_select_initial_window(tcp_full_space(sk), req->mss,
>                                  &req->rcv_wnd, &req->window_clamp,
> -                                 ireq->wscale_ok, &rcv_wscale);
> +                                 ireq->wscale_ok, &rcv_wscale,
> +                                 dst_metric(&rt->u.dst, RTAX_INITRWND));
>
>        ireq->rcv_wscale  = rcv_wscale;
>
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index fcd278a..ee42c75 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -179,7 +179,8 @@ static inline void tcp_event_ack_sent(struct sock *sk,
> unsigned int pkts)
>  */
>  void tcp_select_initial_window(int __space, __u32 mss,
>                               __u32 *rcv_wnd, __u32 *window_clamp,
> -                              int wscale_ok, __u8 *rcv_wscale)
> +                              int wscale_ok, __u8 *rcv_wscale,
> +                              __u32 init_rcv_wnd)
>  {
>        unsigned int space = (__space < 0 ? 0 : __space);
>
> @@ -228,7 +229,13 @@ void tcp_select_initial_window(int __space, __u32 mss,
>                        init_cwnd = 2;
>                else if (mss > 1460)
>                        init_cwnd = 3;
> -               if (*rcv_wnd > init_cwnd * mss)
> +               /* when initializing use the value from init_rcv_wnd
> +                * rather than the default from above
> +                */
> +               if (init_rcv_wnd &&
> +                   (*rcv_wnd > init_rcv_wnd * mss))
> +                       *rcv_wnd = init_rcv_wnd * mss;
> +               else if (*rcv_wnd > init_cwnd * mss)
>                        *rcv_wnd = init_cwnd * mss;
>        }
>
> @@ -2254,7 +2261,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk,
> struct dst_entry *dst,
>                        &req->rcv_wnd,
>                        &req->window_clamp,
>                        ireq->wscale_ok,
> -                       &rcv_wscale);
> +                       &rcv_wscale,
> +                       dst_metric(dst, RTAX_INITRWND));
>                ireq->rcv_wscale = rcv_wscale;
>        }
>
> @@ -2342,7 +2350,8 @@ static void tcp_connect_init(struct sock *sk)
>                                  &tp->rcv_wnd,
>                                  &tp->window_clamp,
>                                  sysctl_tcp_window_scaling,
> -                                 &rcv_wscale);
> +                                 &rcv_wscale,
> +                                 dst_metric(dst, RTAX_INITRWND));
>
>        tp->rx_opt.rcv_wscale = rcv_wscale;
>        tp->rcv_ssthresh = tp->rcv_wnd;
> diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
> index 6b6ae91..c8982aa 100644
> --- a/net/ipv6/syncookies.c
> +++ b/net/ipv6/syncookies.c
> @@ -267,7 +267,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct
> sk_buff *skb)
>        req->window_clamp = tp->window_clamp ? :dst_metric(dst,
> RTAX_WINDOW);
>        tcp_select_initial_window(tcp_full_space(sk), req->mss,
>                                  &req->rcv_wnd, &req->window_clamp,
> -                                 ireq->wscale_ok, &rcv_wscale);
> +                                 ireq->wscale_ok, &rcv_wscale,
> +                                 dst_metric(dst, RTAX_INITRWND));
>
>        ireq->rcv_wscale = rcv_wscale;
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
> ----- End forwarded message -----
> _______________________________________________
> Beowulf mailing list, Beowulf at beowulf.org sponsored by Penguin Computing
> To change your subscription (digest mode or unsubscribe) visit
> http://www.beowulf.org/mailman/listinfo/beowulf
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://www.scyld.com/pipermail/beowulf/attachments/20100814/1560915f/attachment.html


More information about the Beowulf mailing list