cregit-Linux how code gets into the kernel

Release 4.11 net/ipv4/tcp_output.c

Directory: net/ipv4
/*
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *
 *              Implementation of the Transmission Control Protocol(TCP).
 *
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              Mark Evans, <evansmp@uhura.aston.ac.uk>
 *              Corey Minyard <wf-rch!minyard@relay.EU.net>
 *              Florian La Roche, <flla@stud.uni-sb.de>
 *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
 *              Linus Torvalds, <torvalds@cs.helsinki.fi>
 *              Alan Cox, <gw4pts@gw4pts.ampr.org>
 *              Matthew Dillon, <dillon@apollo.west.oic.com>
 *              Arnt Gulbrandsen, <agulbra@nvg.unit.no>
 *              Jorge Cwik, <jorge@laser.satlink.net>
 */

/*
 * Changes:     Pedro Roque     :       Retransmit queue handled by TCP.
 *                              :       Fragmentation on mtu decrease
 *                              :       Segment collapse on retransmit
 *                              :       AF independence
 *
 *              Linus Torvalds  :       send_delayed_ack
 *              David S. Miller :       Charge memory using the right skb
 *                                      during syn/ack processing.
 *              David S. Miller :       Output engine completely rewritten.
 *              Andrea Arcangeli:       SYNACK carry ts_recent in tsecr.
 *              Cacophonix Gaul :       draft-minshall-nagle-01
 *              J Hadi Salim    :       ECN support
 *
 */


#define pr_fmt(fmt) "TCP: " fmt

#include <net/tcp.h>

#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>

/* People can turn this off for buggy TCP's found in printers etc. */

int sysctl_tcp_retrans_collapse __read_mostly = 1;

/* People can turn this on to work with those rare, broken TCPs that
 * interpret the window field as a signed quantity.
 */

int sysctl_tcp_workaround_signed_windows __read_mostly = 0;

/* Default TSQ limit of four TSO segments */

int sysctl_tcp_limit_output_bytes __read_mostly = 262144;

/* This limits the percentage of the congestion window which we
 * will allow a single TSO frame to consume.  Building TSO frames
 * which are too large can cause TCP streams to be bursty.
 */

int sysctl_tcp_tso_win_divisor __read_mostly = 3;

/* By default, RFC2861 behavior.  */

int sysctl_tcp_slow_start_after_idle __read_mostly = 1;

static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
			   int push_one, gfp_t gfp);

/* Account for new data that has been sent to the network. */

static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT, tcp_skb_pcount(skb)); }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen5149.51%216.67%
Yuchung Cheng1615.53%216.67%
Nandita Dukkipati1514.56%18.33%
Linus Torvalds (pre-git)1312.62%216.67%
David S. Miller54.85%216.67%
Eric Dumazet21.94%216.67%
Arnaldo Carvalho de Melo10.97%18.33%
Total103100.00%12100.00%

/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one * window scaling factor due to loss of precision. * If window has been shrunk, what should we make? It is not clear at all. * Using SND.UNA we will fail to open window, SND.NXT is out of window. :-( * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */
static inline __u32 tcp_acceptable_seq(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); if (!before(tcp_wnd_end(tp), tp->snd_nxt) || (tp->rx_opt.wscale_ok && ((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale)))) return tp->snd_nxt; else return tcp_wnd_end(tp); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)3340.24%228.57%
Cui, Cheng3137.80%114.29%
Ilpo Järvinen1417.07%228.57%
Arnaldo Carvalho de Melo22.44%114.29%
Eric Dumazet22.44%114.29%
Total82100.00%7100.00%

/* Calculate mss to advertise in SYN segment. * RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that: * * 1. It is independent of path mtu. * 2. Ideally, it is maximal possible segment size i.e. 65535-40. * 3. For IPv4 it is reasonable to calculate it from maximal MTU of * attached devices, because some buggy hosts are confused by * large MSS. * 4. We do not make 3, we advertise MSS, calculated from first * hop device mtu, but allow to raise it to ip_rt_min_advmss. * This may be overridden via information stored in routing table. * 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible, * probably even Jumbo". */
static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); const struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; if (dst) { unsigned int metric = dst_metric_advmss(dst); if (metric < mss) { mss = metric; tp->advmss = mss; } } return (__u16)mss; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)5874.36%228.57%
David S. Miller1620.51%228.57%
Alexey Kuznetsov22.56%114.29%
Eric Dumazet11.28%114.29%
Arnaldo Carvalho de Melo11.28%114.29%
Total78100.00%7100.00%

/* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */
void tcp_cwnd_restart(struct sock *sk, s32 delta) { struct tcp_sock *tp = tcp_sk(sk); u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk)); u32 cwnd = tp->snd_cwnd; tcp_ca_event(sk, CA_EVENT_CWND_RESTART); tp->snd_ssthresh = tcp_current_ssthresh(sk); restart_cwnd = min(restart_cwnd, cwnd); while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd) cwnd >>= 1; tp->snd_cwnd = max(cwnd, restart_cwnd); tp->snd_cwnd_stamp = tcp_time_stamp; tp->snd_cwnd_used = 0; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)7969.30%112.50%
Arnaldo Carvalho de Melo1916.67%225.00%
Eric Dumazet65.26%112.50%
David S. Miller54.39%225.00%
Stephen Hemminger32.63%112.50%
Linus Torvalds21.75%112.50%
Total114100.00%8100.00%

/* Congestion state accounting after a packet has been sent. */
static void tcp_event_data_sent(struct tcp_sock *tp, struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; if (tcp_packets_in_flight(tp) == 0) tcp_ca_event(sk, CA_EVENT_TX_START); tp->lsndtime = now; /* If it is a reply for ato after last received * packet, enter pingpong mode. */ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato) icsk->icsk_ack.pingpong = 1; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)4755.95%233.33%
Arnaldo Carvalho de Melo1821.43%233.33%
Neal Cardwell1619.05%116.67%
David S. Miller33.57%116.67%
Total84100.00%6100.00%

/* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts) { tcp_dec_quickack_mode(sk, pkts); inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK); }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)2066.67%350.00%
David S. Miller620.00%116.67%
Arnaldo Carvalho de Melo310.00%116.67%
Stephen Hemminger13.33%116.67%
Total30100.00%6100.00%


u32 tcp_default_init_rwnd(u32 mss) { /* Initial receive window should be twice of TCP_INIT_CWND to * enable proper sending of new unsent data during fast recovery * (RFC 3517, Section 4, NextSeg() rule (2)). Further place a * limit when mss is larger than 1460. */ u32 init_rwnd = TCP_INIT_CWND * 2; if (mss > 1460) init_rwnd = max((1460 * init_rwnd) / mss, 2U); return init_rwnd; }

Contributors

PersonTokensPropCommitsCommitProp
Yuchung Cheng3997.50%150.00%
Weiping Pan (潘卫平)12.50%150.00%
Total40100.00%2100.00%

/* Determine a window scaling and initial window to offer. * Based on the assumption that the given amount of space * will be offered. Store the results in the tp structure. * NOTE: for smooth operation initial space offering should * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */
void tcp_select_initial_window(int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); /* If no clamp set the clamp to the max possible scaled window */ if (*window_clamp == 0) (*window_clamp) = (65535 << 14); space = min(*window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) space = (space / mss) * mss; /* NOTE: offering an initial window larger than 32767 * will break some buggy TCP stacks. If the admin tells us * it is likely we could be speaking with such a buggy stack * we will truncate our initial window offering to 32K-1 * unless the remote has sent us a window scaling option, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ if (sysctl_tcp_workaround_signed_windows) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = space; (*rcv_wscale) = 0; if (wscale_ok) { /* Set window scaling on max possible window * See RFC1323 for an explanation of the limit to 14 */ space = max_t(u32, space, sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > 65535 && (*rcv_wscale) < 14) { space >>= 1; (*rcv_wscale)++; } } if (mss > (1 << *rcv_wscale)) { if (!init_rcv_wnd) /* Use default unless specified otherwise */ init_rcv_wnd = tcp_default_init_rwnd(mss); *rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss); } /* Set the clamp no higher than max representable value */ (*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp); }

Contributors

PersonTokensPropCommitsCommitProp
Stephen Hemminger20181.05%225.00%
Rick Jones135.24%112.50%
Soheil Hassas Yeganeh114.44%112.50%
Laurent Chavey104.03%112.50%
Nandita Dukkipati72.82%112.50%
Yuchung Cheng52.02%112.50%
Hideaki Yoshifuji / 吉藤英明10.40%112.50%
Total248100.00%8100.00%

EXPORT_SYMBOL(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return * value can be stuffed directly into th->window for an outgoing * frame. */
static u16 tcp_select_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); u32 old_win = tp->rcv_wnd; u32 cur_win = tcp_receive_window(tp); u32 new_win = __tcp_select_window(sk); /* Never shrink the offered window */ if (new_win < cur_win) { /* Danger Will Robinson! * Don't update rcv_wup/rcv_wnd here or else * we will not be able to advertise a zero * window in time. --DaveM * * Relax Will Robinson. */ if (new_win == 0) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWANTZEROWINDOWADV); new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale); } tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; /* Make sure we do not exceed the maximum possible * scaled window. */ if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); /* RFC1323 scaling applied */ new_win >>= tp->rx_opt.rcv_wscale; /* If we advertise zero window, disable fast path. */ if (new_win == 0) { tp->pred_flags = 0; if (old_win) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTOZEROWINDOWADV); } else if (old_win == 0) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV); } return new_win; }

Contributors

PersonTokensPropCommitsCommitProp
Linus Torvalds (pre-git)8341.92%220.00%
Florian Westphal5829.29%110.00%
David S. Miller3618.18%220.00%
Patrick McHardy115.56%110.00%
Arnaldo Carvalho de Melo73.54%220.00%
Rick Jones21.01%110.00%
Linus Torvalds10.51%110.00%
Total198100.00%10100.00%

/* Packet ECN state for a SYN-ACK */
static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE; else if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen4056.34%120.00%
Daniel Borkmann2636.62%120.00%
Changli Gao22.82%120.00%
Eric Dumazet22.82%120.00%
Florian Westphal11.41%120.00%
Total71100.00%5100.00%

/* Packet ECN state for a SYN. */
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 || tcp_ca_needs_ecn(sk); if (!use_ecn) { const struct dst_entry *dst = __sk_dst_get(sk); if (dst && dst_feature(dst, RTAX_FEATURE_ECN)) use_ecn = true; } tp->ecn_flags = 0; if (use_ecn) { TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR; tp->ecn_flags = TCP_ECN_OK; if (tcp_ca_needs_ecn(sk)) INET_ECN_xmit(sk); } }

Contributors

PersonTokensPropCommitsCommitProp
Florian Westphal4739.50%225.00%
Ilpo Järvinen4537.82%225.00%
Daniel Borkmann1714.29%112.50%
Hannes Frederic Sowa75.88%112.50%
Changli Gao21.68%112.50%
Eric Dumazet10.84%112.50%
Total119100.00%8100.00%


static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb) { if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback) /* tp->ecn_flags are cleared at a later point in time when * SYN ACK is ultimatively being received. */ TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR); }

Contributors

PersonTokensPropCommitsCommitProp
Daniel Borkmann42100.00%1100.00%
Total42100.00%1100.00%


static void tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen3093.75%133.33%
Florian Westphal13.12%133.33%
Eric Dumazet13.12%133.33%
Total32100.00%3100.00%

/* Set up ECN state for a packet on a ESTABLISHED socket that is about to * be sent. */
static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int tcp_header_len) { struct tcp_sock *tp = tcp_sk(sk); if (tp->ecn_flags & TCP_ECN_OK) { /* Not-retransmitted data segment: set ECT and inject CWR. */ if (skb->len != tcp_header_len && !before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) { INET_ECN_xmit(sk); if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) { tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR; th->cwr = 1; skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN; } } else if (!tcp_ca_needs_ecn(sk)) { /* ACK or retransmitted segment: clear ECT|CE */ INET_ECN_dontxmit(sk); } if (tp->ecn_flags & TCP_ECN_DEMAND_CWR) th->ece = 1; } }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen12187.68%125.00%
Daniel Borkmann96.52%125.00%
Eric Dumazet75.07%125.00%
Florian Westphal10.72%125.00%
Total138100.00%4100.00%

/* Constructs common control bits of non-data skb. If SYN/FIN is present, * auto increment end seqno. */
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; tcp_skb_pcount_set(skb, 1); TCP_SKB_CB(skb)->seq = seq; if (flags & (TCPHDR_SYN | TCPHDR_FIN)) seq++; TCP_SKB_CB(skb)->end_seq = seq; }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen7183.53%120.00%
David S. Miller67.06%120.00%
Eric Dumazet67.06%240.00%
Changli Gao22.35%120.00%
Total85100.00%5100.00%


static inline bool tcp_urg_mode(const struct tcp_sock *tp) { return tp->snd_una != tp->snd_up; }

Contributors

PersonTokensPropCommitsCommitProp
Ilpo Järvinen2195.45%150.00%
Eric Dumazet14.55%150.00%
Total22100.00%2100.00%

#define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_FAST_OPEN_COOKIE (1 << 8) struct tcp_out_options { u16 options; /* bit field of OPTION_* */ u16 mss; /* 0 to disable */ u8 ws; /* window scale, 0 to disable */ u8 num_sack_blocks; /* number of SACK blocks to include */ u8 hash_size; /* bytes in hash_location */ __u8 *hash_location; /* temporary pointer, overloaded */ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ }; /* Write previously computed TCP options to the packet. * * Beware: Something in the Internet is very sensitive to the ordering of * TCP options, we learned this through the hard way, so be careful here. * Luckily we can at least blame others for their non-compliance but from * inter-operability perspective it seems that we're somewhat stuck with * the ordering which we have been using if we want to keep working with * those broken things (not that it currently hurts anybody as there isn't * particular reason why the ordering would need to be changed). * * At least SACK_PERM as the first option is known to lead to a disaster * (but it may well be that other scenarios fail similarly). */
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, struct tcp_out_options *opts) { u16 options = opts->options; /* mungable copy */ if (unlikely(OPTION_MD5 & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); /* overload cookie hash location */ opts->hash_location = (__u8 *)ptr; ptr += 4; } if (unlikely(opts->mss)) { *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | opts->mss); } if (likely(OPTION_TS & options)) { if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | (TCPOLEN_SACK_PERM << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); options &= ~OPTION_SACK_ADVERTISE; } else { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); } *ptr++ = htonl(opts->tsval); *ptr++ = htonl(opts->tsecr); } if (unlikely(OPTION_SACK_ADVERTISE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK_PERM << 8) | TCPOLEN_SACK_PERM); } if (unlikely(OPTION_WSCALE & options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | opts->ws); } if (unlikely(opts->num_sack_blocks)) { struct tcp_sack_block *sp = tp->rx_opt.dsack ? tp->duplicate_sack : tp->selective_acks; int this_sack; *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_SACK << 8) | (TCPOLEN_SACK_BASE + (opts->num_sack_blocks * TCPOLEN_SACK_PERBLOCK))); for (this_sack = 0; this_sack < opts->num_sack_blocks; ++this_sack) { *ptr++ = htonl(sp[this_sack].start_seq); *ptr++ = htonl(sp[this_sack].end_seq); } tp->rx_opt.dsack = 0; } if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; u8 *p = (u8 *)ptr; u32 len; /* Fast Open option length */ if (foc->exp) { len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | TCPOPT_FASTOPEN_MAGIC); p += TCPOLEN_EXP_FASTOPEN_BASE; } else { len = TCPOLEN_FASTOPEN_BASE + foc->len; *p++ = TCPOPT_FASTOPEN; *p++ = len; } memcpy(p, foc->val, foc->len); if ((len & 3) == 2) { p[foc->len] = TCPOPT_NOP; p[foc->len + 1] = TCPOPT_NOP; } ptr += (len + 3) >> 2; } }

Contributors

PersonTokensPropCommitsCommitProp
Adam Langley17831.01%110.00%
Stephen Hemminger15026.13%110.00%
Yuchung Cheng8614.98%110.00%
Daniel Lee6711.67%110.00%
Ilpo Järvinen345.92%110.00%
Hideaki Yoshifuji / 吉藤英明325.57%220.00%
William Allen Simpson234.01%110.00%
Ori Finkelman30.52%110.00%
Al Viro10.17%110.00%
Total574100.00%10100.00%

/* Compute TCP options for SYN packets. This is not the final * network wire format yet. */
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); unsigned int remaining = MAX_TCP_OPTION_SPACE; struct tcp_fastopen_request *fastopen = tp->fastopen_req; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (*md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; } #else *md5 = NULL; #endif /* We always get an MSS option. The option bytes which will be seen in * normal data packets should timestamps be used, must be in the MSS * advertised. But we subtract them from tp->mss_cache so that * calculations in tcp_sendmsg are simpler etc. So account for this * fact here if necessary. If we don't do this correctly, as a * receiver we won't recognize data packets as being full sized when we * should, and thus we won't abide by the delayed ACK rules correctly. * SACKs don't matter, we never delay an ACK when we have any of those * going out. */ opts->mss = tcp_advertise_mss(sk); remaining -= TCPOLEN_MSS_ALIGNED; if (likely(sysctl_tcp_timestamps && !*md5)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!(OPTION_TS & opts->options))) remaining -= TCPOLEN_SACKPERM_ALIGNED; } if (fastopen && fastopen->cookie.len >= 0) { u32 need = fastopen->cookie.len; need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = &fastopen->cookie; remaining -= need; tp->syn_fastopen = 1; tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; } } return MAX_TCP_OPTION_SPACE - remaining; }

Contributors

PersonTokensPropCommitsCommitProp
Adam Langley17555.21%18.33%
Yuchung Cheng7724.29%18.33%
Daniel Lee268.20%18.33%
William Allen Simpson185.68%18.33%
Stephen Hemminger51.58%18.33%
Andrey Vagin41.26%18.33%
Ori Finkelman41.26%18.33%
Eric Dumazet30.95%216.67%
David S. Miller20.63%18.33%
Philip Love20.63%18.33%
Ian Morris10.32%18.33%
Total317100.00%12100.00%

/* Set up TCP options for SYN-ACKs. */
static unsigned int tcp_synack_options(struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; #ifdef CONFIG_TCP_MD5SIG if (md5) { opts->options |= OPTION_MD5; remaining -= TCPOLEN_MD5SIG_ALIGNED; /* We can't fit any SACK blocks in a packet with MD5 + TS * options. There was discussion about disabling SACK * rather than TS in order to fit in better with old, * buggy kernels, but that was deemed to be unnecessary. */ ireq->tstamp_ok &= !ireq->sack_ok; } #endif /* We always send an MSS option. */ opts->mss = mss; remaining -= TCPOLEN_MSS_ALIGNED; if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; opts->options |= OPTION_WSCALE; remaining -= TCPOLEN_WSCALE_ALIGNED; } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off; opts->tsecr = req->ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } if (likely(ireq->sack_ok)) { opts->options |= OPTION_SACK_ADVERTISE; if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } if (foc != NULL && foc->len >= 0) { u32 need = foc->len; need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE : TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = foc; remaining -= need; } } return MAX_TCP_OPTION_SPACE - remaining; }

Contributors

PersonTokensPropCommitsCommitProp
Adam Langley14452.94%17.69%
William Allen Simpson5821.32%17.69%
Christoph Paasch207.35%17.69%
Eric Dumazet145.15%430.77%
Daniel Lee124.41%17.69%
Florian Westphal72.57%17.69%
Yuchung Cheng62.21%17.69%
Jerry Chu51.84%17.69%
Ori Finkelman41.47%17.69%
Philip Love20.74%17.69%
Total272100.00%13100.00%

/* Compute TCP options for ESTABLISHED sockets. This is not the * final wire format yet. */
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5) { struct tcp_sock *tp = tcp_sk(sk); unsigned int size = 0; unsigned int eff_sacks; opts->options = 0; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); if (unlikely(*md5)) { opts->options |= OPTION_MD5; size += TCPOLEN_MD5SIG_ALIGNED; } #else *md5 = NULL; #endif if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; if (unlikely(eff_sacks)) { const unsigned int remaining