Release 4.11 net/ipv4/tcp_output.c
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Implementation of the Transmission Control Protocol(TCP).
*
* Authors: Ross Biro
* Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
* Mark Evans, <evansmp@uhura.aston.ac.uk>
* Corey Minyard <wf-rch!minyard@relay.EU.net>
* Florian La Roche, <flla@stud.uni-sb.de>
* Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
* Linus Torvalds, <torvalds@cs.helsinki.fi>
* Alan Cox, <gw4pts@gw4pts.ampr.org>
* Matthew Dillon, <dillon@apollo.west.oic.com>
* Arnt Gulbrandsen, <agulbra@nvg.unit.no>
* Jorge Cwik, <jorge@laser.satlink.net>
*/
/*
* Changes: Pedro Roque : Retransmit queue handled by TCP.
* : Fragmentation on mtu decrease
* : Segment collapse on retransmit
* : AF independence
*
* Linus Torvalds : send_delayed_ack
* David S. Miller : Charge memory using the right skb
* during syn/ack processing.
* David S. Miller : Output engine completely rewritten.
* Andrea Arcangeli: SYNACK carry ts_recent in tsecr.
* Cacophonix Gaul : draft-minshall-nagle-01
* J Hadi Salim : ECN support
*
*/
#define pr_fmt(fmt) "TCP: " fmt
#include <net/tcp.h>
#include <linux/compiler.h>
#include <linux/gfp.h>
#include <linux/module.h>
/* People can turn this off for buggy TCP's found in printers etc. */
int sysctl_tcp_retrans_collapse __read_mostly = 1;
/* People can turn this on to work with those rare, broken TCPs that
* interpret the window field as a signed quantity.
*/
int sysctl_tcp_workaround_signed_windows __read_mostly = 0;
/* Default TSQ limit of four TSO segments */
int sysctl_tcp_limit_output_bytes __read_mostly = 262144;
/* This limits the percentage of the congestion window which we
* will allow a single TSO frame to consume. Building TSO frames
* which are too large can cause TCP streams to be bursty.
*/
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
int push_one, gfp_t gfp);
/* Account for new data that has been sent to the network. */
static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned int prior_packets = tp->packets_out;
tcp_advance_send_head(sk, skb);
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
tp->packets_out += tcp_skb_pcount(skb);
if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
tcp_rearm_rto(sk);
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT,
tcp_skb_pcount(skb));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 51 | 49.51% | 2 | 16.67% |
Yuchung Cheng | 16 | 15.53% | 2 | 16.67% |
Nandita Dukkipati | 15 | 14.56% | 1 | 8.33% |
Linus Torvalds (pre-git) | 13 | 12.62% | 2 | 16.67% |
David S. Miller | 5 | 4.85% | 2 | 16.67% |
Eric Dumazet | 2 | 1.94% | 2 | 16.67% |
Arnaldo Carvalho de Melo | 1 | 0.97% | 1 | 8.33% |
Total | 103 | 100.00% | 12 | 100.00% |
/* SND.NXT, if window was not shrunk or the amount of shrunk was less than one
* window scaling factor due to loss of precision.
* If window has been shrunk, what should we make? It is not clear at all.
* Using SND.UNA we will fail to open window, SND.NXT is out of window. :-(
* Anything in between SND.UNA...SND.UNA+SND.WND also can be already
* invalid. OK, let's make this for now:
*/
static inline __u32 tcp_acceptable_seq(const struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
if (!before(tcp_wnd_end(tp), tp->snd_nxt) ||
(tp->rx_opt.wscale_ok &&
((tp->snd_nxt - tcp_wnd_end(tp)) < (1 << tp->rx_opt.rcv_wscale))))
return tp->snd_nxt;
else
return tcp_wnd_end(tp);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 33 | 40.24% | 2 | 28.57% |
Cui, Cheng | 31 | 37.80% | 1 | 14.29% |
Ilpo Järvinen | 14 | 17.07% | 2 | 28.57% |
Arnaldo Carvalho de Melo | 2 | 2.44% | 1 | 14.29% |
Eric Dumazet | 2 | 2.44% | 1 | 14.29% |
Total | 82 | 100.00% | 7 | 100.00% |
/* Calculate mss to advertise in SYN segment.
* RFC1122, RFC1063, draft-ietf-tcpimpl-pmtud-01 state that:
*
* 1. It is independent of path mtu.
* 2. Ideally, it is maximal possible segment size i.e. 65535-40.
* 3. For IPv4 it is reasonable to calculate it from maximal MTU of
* attached devices, because some buggy hosts are confused by
* large MSS.
* 4. We do not make 3, we advertise MSS, calculated from first
* hop device mtu, but allow to raise it to ip_rt_min_advmss.
* This may be overridden via information stored in routing table.
* 5. Value 65535 for MSS is valid in IPv6 and means "as large as possible,
* probably even Jumbo".
*/
static __u16 tcp_advertise_mss(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct dst_entry *dst = __sk_dst_get(sk);
int mss = tp->advmss;
if (dst) {
unsigned int metric = dst_metric_advmss(dst);
if (metric < mss) {
mss = metric;
tp->advmss = mss;
}
}
return (__u16)mss;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 58 | 74.36% | 2 | 28.57% |
David S. Miller | 16 | 20.51% | 2 | 28.57% |
Alexey Kuznetsov | 2 | 2.56% | 1 | 14.29% |
Eric Dumazet | 1 | 1.28% | 1 | 14.29% |
Arnaldo Carvalho de Melo | 1 | 1.28% | 1 | 14.29% |
Total | 78 | 100.00% | 7 | 100.00% |
/* RFC2861. Reset CWND after idle period longer RTO to "restart window".
* This is the first part of cwnd validation mechanism.
*/
void tcp_cwnd_restart(struct sock *sk, s32 delta)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 restart_cwnd = tcp_init_cwnd(tp, __sk_dst_get(sk));
u32 cwnd = tp->snd_cwnd;
tcp_ca_event(sk, CA_EVENT_CWND_RESTART);
tp->snd_ssthresh = tcp_current_ssthresh(sk);
restart_cwnd = min(restart_cwnd, cwnd);
while ((delta -= inet_csk(sk)->icsk_rto) > 0 && cwnd > restart_cwnd)
cwnd >>= 1;
tp->snd_cwnd = max(cwnd, restart_cwnd);
tp->snd_cwnd_stamp = tcp_time_stamp;
tp->snd_cwnd_used = 0;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 79 | 69.30% | 1 | 12.50% |
Arnaldo Carvalho de Melo | 19 | 16.67% | 2 | 25.00% |
Eric Dumazet | 6 | 5.26% | 1 | 12.50% |
David S. Miller | 5 | 4.39% | 2 | 25.00% |
Stephen Hemminger | 3 | 2.63% | 1 | 12.50% |
Linus Torvalds | 2 | 1.75% | 1 | 12.50% |
Total | 114 | 100.00% | 8 | 100.00% |
/* Congestion state accounting after a packet has been sent. */
static void tcp_event_data_sent(struct tcp_sock *tp,
struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const u32 now = tcp_time_stamp;
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
tp->lsndtime = now;
/* If it is a reply for ato after last received
* packet, enter pingpong mode.
*/
if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
icsk->icsk_ack.pingpong = 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 47 | 55.95% | 2 | 33.33% |
Arnaldo Carvalho de Melo | 18 | 21.43% | 2 | 33.33% |
Neal Cardwell | 16 | 19.05% | 1 | 16.67% |
David S. Miller | 3 | 3.57% | 1 | 16.67% |
Total | 84 | 100.00% | 6 | 100.00% |
/* Account for an ACK we sent. */
static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts)
{
tcp_dec_quickack_mode(sk, pkts);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 20 | 66.67% | 3 | 50.00% |
David S. Miller | 6 | 20.00% | 1 | 16.67% |
Arnaldo Carvalho de Melo | 3 | 10.00% | 1 | 16.67% |
Stephen Hemminger | 1 | 3.33% | 1 | 16.67% |
Total | 30 | 100.00% | 6 | 100.00% |
u32 tcp_default_init_rwnd(u32 mss)
{
/* Initial receive window should be twice of TCP_INIT_CWND to
* enable proper sending of new unsent data during fast recovery
* (RFC 3517, Section 4, NextSeg() rule (2)). Further place a
* limit when mss is larger than 1460.
*/
u32 init_rwnd = TCP_INIT_CWND * 2;
if (mss > 1460)
init_rwnd = max((1460 * init_rwnd) / mss, 2U);
return init_rwnd;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yuchung Cheng | 39 | 97.50% | 1 | 50.00% |
Weiping Pan (潘卫平) | 1 | 2.50% | 1 | 50.00% |
Total | 40 | 100.00% | 2 | 100.00% |
/* Determine a window scaling and initial window to offer.
* Based on the assumption that the given amount of space
* will be offered. Store the results in the tp structure.
* NOTE: for smooth operation initial space offering should
* be a multiple of mss if possible. We assume here that mss >= 1.
* This MUST be enforced by all callers.
*/
void tcp_select_initial_window(int __space, __u32 mss,
__u32 *rcv_wnd, __u32 *window_clamp,
int wscale_ok, __u8 *rcv_wscale,
__u32 init_rcv_wnd)
{
unsigned int space = (__space < 0 ? 0 : __space);
/* If no clamp set the clamp to the max possible scaled window */
if (*window_clamp == 0)
(*window_clamp) = (65535 << 14);
space = min(*window_clamp, space);
/* Quantize space offering to a multiple of mss if possible. */
if (space > mss)
space = (space / mss) * mss;
/* NOTE: offering an initial window larger than 32767
* will break some buggy TCP stacks. If the admin tells us
* it is likely we could be speaking with such a buggy stack
* we will truncate our initial window offering to 32K-1
* unless the remote has sent us a window scaling option,
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
if (sysctl_tcp_workaround_signed_windows)
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = space;
(*rcv_wscale) = 0;
if (wscale_ok) {
/* Set window scaling on max possible window
* See RFC1323 for an explanation of the limit to 14
*/
space = max_t(u32, space, sysctl_tcp_rmem[2]);
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
while (space > 65535 && (*rcv_wscale) < 14) {
space >>= 1;
(*rcv_wscale)++;
}
}
if (mss > (1 << *rcv_wscale)) {
if (!init_rcv_wnd) /* Use default unless specified otherwise */
init_rcv_wnd = tcp_default_init_rwnd(mss);
*rcv_wnd = min(*rcv_wnd, init_rcv_wnd * mss);
}
/* Set the clamp no higher than max representable value */
(*window_clamp) = min(65535U << (*rcv_wscale), *window_clamp);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Stephen Hemminger | 201 | 81.05% | 2 | 25.00% |
Rick Jones | 13 | 5.24% | 1 | 12.50% |
Soheil Hassas Yeganeh | 11 | 4.44% | 1 | 12.50% |
Laurent Chavey | 10 | 4.03% | 1 | 12.50% |
Nandita Dukkipati | 7 | 2.82% | 1 | 12.50% |
Yuchung Cheng | 5 | 2.02% | 1 | 12.50% |
Hideaki Yoshifuji / 吉藤英明 | 1 | 0.40% | 1 | 12.50% |
Total | 248 | 100.00% | 8 | 100.00% |
EXPORT_SYMBOL(tcp_select_initial_window);
/* Chose a new window to advertise, update state in tcp_sock for the
* socket, and return result with RFC1323 scaling applied. The return
* value can be stuffed directly into th->window for an outgoing
* frame.
*/
static u16 tcp_select_window(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 old_win = tp->rcv_wnd;
u32 cur_win = tcp_receive_window(tp);
u32 new_win = __tcp_select_window(sk);
/* Never shrink the offered window */
if (new_win < cur_win) {
/* Danger Will Robinson!
* Don't update rcv_wup/rcv_wnd here or else
* we will not be able to advertise a zero
* window in time. --DaveM
*
* Relax Will Robinson.
*/
if (new_win == 0)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPWANTZEROWINDOWADV);
new_win = ALIGN(cur_win, 1 << tp->rx_opt.rcv_wscale);
}
tp->rcv_wnd = new_win;
tp->rcv_wup = tp->rcv_nxt;
/* Make sure we do not exceed the maximum possible
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows)
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
/* RFC1323 scaling applied */
new_win >>= tp->rx_opt.rcv_wscale;
/* If we advertise zero window, disable fast path. */
if (new_win == 0) {
tp->pred_flags = 0;
if (old_win)
NET_INC_STATS(sock_net(sk),
LINUX_MIB_TCPTOZEROWINDOWADV);
} else if (old_win == 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFROMZEROWINDOWADV);
}
return new_win;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Linus Torvalds (pre-git) | 83 | 41.92% | 2 | 20.00% |
Florian Westphal | 58 | 29.29% | 1 | 10.00% |
David S. Miller | 36 | 18.18% | 2 | 20.00% |
Patrick McHardy | 11 | 5.56% | 1 | 10.00% |
Arnaldo Carvalho de Melo | 7 | 3.54% | 2 | 20.00% |
Rick Jones | 2 | 1.01% | 1 | 10.00% |
Linus Torvalds | 1 | 0.51% | 1 | 10.00% |
Total | 198 | 100.00% | 10 | 100.00% |
/* Packet ECN state for a SYN-ACK */
static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk))
INET_ECN_xmit(sk);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 40 | 56.34% | 1 | 20.00% |
Daniel Borkmann | 26 | 36.62% | 1 | 20.00% |
Changli Gao | 2 | 2.82% | 1 | 20.00% |
Eric Dumazet | 2 | 2.82% | 1 | 20.00% |
Florian Westphal | 1 | 1.41% | 1 | 20.00% |
Total | 71 | 100.00% | 5 | 100.00% |
/* Packet ECN state for a SYN. */
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
tcp_ca_needs_ecn(sk);
if (!use_ecn) {
const struct dst_entry *dst = __sk_dst_get(sk);
if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
use_ecn = true;
}
tp->ecn_flags = 0;
if (use_ecn) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk))
INET_ECN_xmit(sk);
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Florian Westphal | 47 | 39.50% | 2 | 25.00% |
Ilpo Järvinen | 45 | 37.82% | 2 | 25.00% |
Daniel Borkmann | 17 | 14.29% | 1 | 12.50% |
Hannes Frederic Sowa | 7 | 5.88% | 1 | 12.50% |
Changli Gao | 2 | 1.68% | 1 | 12.50% |
Eric Dumazet | 1 | 0.84% | 1 | 12.50% |
Total | 119 | 100.00% | 8 | 100.00% |
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{
if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
/* tp->ecn_flags are cleared at a later point in time when
* SYN ACK is ultimatively being received.
*/
TCP_SKB_CB(skb)->tcp_flags &= ~(TCPHDR_ECE | TCPHDR_CWR);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Daniel Borkmann | 42 | 100.00% | 1 | 100.00% |
Total | 42 | 100.00% | 1 | 100.00% |
static void
tcp_ecn_make_synack(const struct request_sock *req, struct tcphdr *th)
{
if (inet_rsk(req)->ecn_ok)
th->ece = 1;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 30 | 93.75% | 1 | 33.33% |
Florian Westphal | 1 | 3.12% | 1 | 33.33% |
Eric Dumazet | 1 | 3.12% | 1 | 33.33% |
Total | 32 | 100.00% | 3 | 100.00% |
/* Set up ECN state for a packet on a ESTABLISHED socket that is about to
* be sent.
*/
static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb,
struct tcphdr *th, int tcp_header_len)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->ecn_flags & TCP_ECN_OK) {
/* Not-retransmitted data segment: set ECT and inject CWR. */
if (skb->len != tcp_header_len &&
!before(TCP_SKB_CB(skb)->seq, tp->snd_nxt)) {
INET_ECN_xmit(sk);
if (tp->ecn_flags & TCP_ECN_QUEUE_CWR) {
tp->ecn_flags &= ~TCP_ECN_QUEUE_CWR;
th->cwr = 1;
skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_ECN;
}
} else if (!tcp_ca_needs_ecn(sk)) {
/* ACK or retransmitted segment: clear ECT|CE */
INET_ECN_dontxmit(sk);
}
if (tp->ecn_flags & TCP_ECN_DEMAND_CWR)
th->ece = 1;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 121 | 87.68% | 1 | 25.00% |
Daniel Borkmann | 9 | 6.52% | 1 | 25.00% |
Eric Dumazet | 7 | 5.07% | 1 | 25.00% |
Florian Westphal | 1 | 0.72% | 1 | 25.00% |
Total | 138 | 100.00% | 4 | 100.00% |
/* Constructs common control bits of non-data skb. If SYN/FIN is present,
* auto increment end seqno.
*/
static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
{
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
TCP_SKB_CB(skb)->tcp_flags = flags;
TCP_SKB_CB(skb)->sacked = 0;
tcp_skb_pcount_set(skb, 1);
TCP_SKB_CB(skb)->seq = seq;
if (flags & (TCPHDR_SYN | TCPHDR_FIN))
seq++;
TCP_SKB_CB(skb)->end_seq = seq;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 71 | 83.53% | 1 | 20.00% |
David S. Miller | 6 | 7.06% | 1 | 20.00% |
Eric Dumazet | 6 | 7.06% | 2 | 40.00% |
Changli Gao | 2 | 2.35% | 1 | 20.00% |
Total | 85 | 100.00% | 5 | 100.00% |
static inline bool tcp_urg_mode(const struct tcp_sock *tp)
{
return tp->snd_una != tp->snd_up;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Ilpo Järvinen | 21 | 95.45% | 1 | 50.00% |
Eric Dumazet | 1 | 4.55% | 1 | 50.00% |
Total | 22 | 100.00% | 2 | 100.00% |
#define OPTION_SACK_ADVERTISE (1 << 0)
#define OPTION_TS (1 << 1)
#define OPTION_MD5 (1 << 2)
#define OPTION_WSCALE (1 << 3)
#define OPTION_FAST_OPEN_COOKIE (1 << 8)
struct tcp_out_options {
u16 options; /* bit field of OPTION_* */
u16 mss; /* 0 to disable */
u8 ws; /* window scale, 0 to disable */
u8 num_sack_blocks; /* number of SACK blocks to include */
u8 hash_size; /* bytes in hash_location */
__u8 *hash_location; /* temporary pointer, overloaded */
__u32 tsval, tsecr; /* need to include OPTION_TS */
struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */
};
/* Write previously computed TCP options to the packet.
*
* Beware: Something in the Internet is very sensitive to the ordering of
* TCP options, we learned this through the hard way, so be careful here.
* Luckily we can at least blame others for their non-compliance but from
* inter-operability perspective it seems that we're somewhat stuck with
* the ordering which we have been using if we want to keep working with
* those broken things (not that it currently hurts anybody as there isn't
* particular reason why the ordering would need to be changed).
*
* At least SACK_PERM as the first option is known to lead to a disaster
* (but it may well be that other scenarios fail similarly).
*/
static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
struct tcp_out_options *opts)
{
u16 options = opts->options; /* mungable copy */
if (unlikely(OPTION_MD5 & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
/* overload cookie hash location */
opts->hash_location = (__u8 *)ptr;
ptr += 4;
}
if (unlikely(opts->mss)) {
*ptr++ = htonl((TCPOPT_MSS << 24) |
(TCPOLEN_MSS << 16) |
opts->mss);
}
if (likely(OPTION_TS & options)) {
if (unlikely(OPTION_SACK_ADVERTISE & options)) {
*ptr++ = htonl((TCPOPT_SACK_PERM << 24) |
(TCPOLEN_SACK_PERM << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
options &= ~OPTION_SACK_ADVERTISE;
} else {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_TIMESTAMP << 8) |
TCPOLEN_TIMESTAMP);
}
*ptr++ = htonl(opts->tsval);
*ptr++ = htonl(opts->tsecr);
}
if (unlikely(OPTION_SACK_ADVERTISE & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK_PERM << 8) |
TCPOLEN_SACK_PERM);
}
if (unlikely(OPTION_WSCALE & options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
opts->ws);
}
if (unlikely(opts->num_sack_blocks)) {
struct tcp_sack_block *sp = tp->rx_opt.dsack ?
tp->duplicate_sack : tp->selective_acks;
int this_sack;
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_NOP << 16) |
(TCPOPT_SACK << 8) |
(TCPOLEN_SACK_BASE + (opts->num_sack_blocks *
TCPOLEN_SACK_PERBLOCK)));
for (this_sack = 0; this_sack < opts->num_sack_blocks;
++this_sack) {
*ptr++ = htonl(sp[this_sack].start_seq);
*ptr++ = htonl(sp[this_sack].end_seq);
}
tp->rx_opt.dsack = 0;
}
if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) {
struct tcp_fastopen_cookie *foc = opts->fastopen_cookie;
u8 *p = (u8 *)ptr;
u32 len; /* Fast Open option length */
if (foc->exp) {
len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len;
*ptr = htonl((TCPOPT_EXP << 24) | (len << 16) |
TCPOPT_FASTOPEN_MAGIC);
p += TCPOLEN_EXP_FASTOPEN_BASE;
} else {
len = TCPOLEN_FASTOPEN_BASE + foc->len;
*p++ = TCPOPT_FASTOPEN;
*p++ = len;
}
memcpy(p, foc->val, foc->len);
if ((len & 3) == 2) {
p[foc->len] = TCPOPT_NOP;
p[foc->len + 1] = TCPOPT_NOP;
}
ptr += (len + 3) >> 2;
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Adam Langley | 178 | 31.01% | 1 | 10.00% |
Stephen Hemminger | 150 | 26.13% | 1 | 10.00% |
Yuchung Cheng | 86 | 14.98% | 1 | 10.00% |
Daniel Lee | 67 | 11.67% | 1 | 10.00% |
Ilpo Järvinen | 34 | 5.92% | 1 | 10.00% |
Hideaki Yoshifuji / 吉藤英明 | 32 | 5.57% | 2 | 20.00% |
William Allen Simpson | 23 | 4.01% | 1 | 10.00% |
Ori Finkelman | 3 | 0.52% | 1 | 10.00% |
Al Viro | 1 | 0.17% | 1 | 10.00% |
Total | 574 | 100.00% | 10 | 100.00% |
/* Compute TCP options for SYN packets. This is not the final
* network wire format yet.
*/
static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
struct tcp_fastopen_request *fastopen = tp->fastopen_req;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (*md5) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
}
#else
*md5 = NULL;
#endif
/* We always get an MSS option. The option bytes which will be seen in
* normal data packets should timestamps be used, must be in the MSS
* advertised. But we subtract them from tp->mss_cache so that
* calculations in tcp_sendmsg are simpler etc. So account for this
* fact here if necessary. If we don't do this correctly, as a
* receiver we won't recognize data packets as being full sized when we
* should, and thus we won't abide by the delayed ACK rules correctly.
* SACKs don't matter, we never delay an ACK when we have any of those
* going out. */
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
if (likely(sysctl_tcp_timestamps && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(sysctl_tcp_window_scaling)) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
if (likely(sysctl_tcp_sack)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
if (fastopen && fastopen->cookie.len >= 0) {
u32 need = fastopen->cookie.len;
need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE :
TCPOLEN_FASTOPEN_BASE;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
opts->fastopen_cookie = &fastopen->cookie;
remaining -= need;
tp->syn_fastopen = 1;
tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0;
}
}
return MAX_TCP_OPTION_SPACE - remaining;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Adam Langley | 175 | 55.21% | 1 | 8.33% |
Yuchung Cheng | 77 | 24.29% | 1 | 8.33% |
Daniel Lee | 26 | 8.20% | 1 | 8.33% |
William Allen Simpson | 18 | 5.68% | 1 | 8.33% |
Stephen Hemminger | 5 | 1.58% | 1 | 8.33% |
Andrey Vagin | 4 | 1.26% | 1 | 8.33% |
Ori Finkelman | 4 | 1.26% | 1 | 8.33% |
Eric Dumazet | 3 | 0.95% | 2 | 16.67% |
David S. Miller | 2 | 0.63% | 1 | 8.33% |
Philip Love | 2 | 0.63% | 1 | 8.33% |
Ian Morris | 1 | 0.32% | 1 | 8.33% |
Total | 317 | 100.00% | 12 | 100.00% |
/* Set up TCP options for SYN-ACKs. */
static unsigned int tcp_synack_options(struct request_sock *req,
unsigned int mss, struct sk_buff *skb,
struct tcp_out_options *opts,
const struct tcp_md5sig_key *md5,
struct tcp_fastopen_cookie *foc)
{
struct inet_request_sock *ireq = inet_rsk(req);
unsigned int remaining = MAX_TCP_OPTION_SPACE;
#ifdef CONFIG_TCP_MD5SIG
if (md5) {
opts->options |= OPTION_MD5;
remaining -= TCPOLEN_MD5SIG_ALIGNED;
/* We can't fit any SACK blocks in a packet with MD5 + TS
* options. There was discussion about disabling SACK
* rather than TS in order to fit in better with old,
* buggy kernels, but that was deemed to be unnecessary.
*/
ireq->tstamp_ok &= !ireq->sack_ok;
}
#endif
/* We always send an MSS option. */
opts->mss = mss;
remaining -= TCPOLEN_MSS_ALIGNED;
if (likely(ireq->wscale_ok)) {
opts->ws = ireq->rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
if (likely(ireq->tstamp_ok)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
opts->tsecr = req->ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
if (likely(ireq->sack_ok)) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!ireq->tstamp_ok))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
}
if (foc != NULL && foc->len >= 0) {
u32 need = foc->len;
need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE :
TCPOLEN_FASTOPEN_BASE;
need = (need + 3) & ~3U; /* Align to 32 bits */
if (remaining >= need) {
opts->options |= OPTION_FAST_OPEN_COOKIE;
opts->fastopen_cookie = foc;
remaining -= need;
}
}
return MAX_TCP_OPTION_SPACE - remaining;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Adam Langley | 144 | 52.94% | 1 | 7.69% |
William Allen Simpson | 58 | 21.32% | 1 | 7.69% |
Christoph Paasch | 20 | 7.35% | 1 | 7.69% |
Eric Dumazet | 14 | 5.15% | 4 | 30.77% |
Daniel Lee | 12 | 4.41% | 1 | 7.69% |
Florian Westphal | 7 | 2.57% | 1 | 7.69% |
Yuchung Cheng | 6 | 2.21% | 1 | 7.69% |
Jerry Chu | 5 | 1.84% | 1 | 7.69% |
Ori Finkelman | 4 | 1.47% | 1 | 7.69% |
Philip Love | 2 | 0.74% | 1 | 7.69% |
Total | 272 | 100.00% | 13 | 100.00% |
/* Compute TCP options for ESTABLISHED sockets. This is not the
* final wire format yet.
*/
static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb,
struct tcp_out_options *opts,
struct tcp_md5sig_key **md5)
{
struct tcp_sock *tp = tcp_sk(sk);
unsigned int size = 0;
unsigned int eff_sacks;
opts->options = 0;
#ifdef CONFIG_TCP_MD5SIG
*md5 = tp->af_specific->md5_lookup(sk, sk);
if (unlikely(*md5)) {
opts->options |= OPTION_MD5;
size += TCPOLEN_MD5SIG_ALIGNED;
}
#else
*md5 = NULL;
#endif
if (likely(tp->rx_opt.tstamp_ok)) {
opts->options |= OPTION_TS;
opts->tsval = skb ? tcp_skb_timestamp(skb) + tp->tsoffset : 0;
opts->tsecr = tp->rx_opt.ts_recent;
size += TCPOLEN_TSTAMP_ALIGNED;
}
eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack;
if (unlikely(eff_sacks)) {
const unsigned int remaining