cregit-Linux how code gets into the kernel

Release 4.12 include/net/tcp.h

Directory: include/net
 * INET         An implementation of the TCP/IP protocol suite for the LINUX
 *              operating system.  INET is implemented using the  BSD Socket
 *              interface as the means of communication with the user level.
 *              Definitions for the TCP module.
 * Version:     @(#)tcp.h       1.0.5   05/23/93
 * Authors:     Ross Biro
 *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *              This program is free software; you can redistribute it and/or
 *              modify it under the terms of the GNU General Public License
 *              as published by the Free Software Foundation; either version
 *              2 of the License, or (at your option) any later version.
#ifndef _TCP_H

#define _TCP_H


#include <linux/list.h>
#include <linux/tcp.h>
#include <linux/bug.h>
#include <linux/slab.h>
#include <linux/cache.h>
#include <linux/percpu.h>
#include <linux/skbuff.h>
#include <linux/cryptohash.h>
#include <linux/kref.h>
#include <linux/ktime.h>

#include <net/inet_connection_sock.h>
#include <net/inet_timewait_sock.h>
#include <net/inet_hashtables.h>
#include <net/checksum.h>
#include <net/request_sock.h>
#include <net/sock.h>
#include <net/snmp.h>
#include <net/ip.h>
#include <net/tcp_states.h>
#include <net/inet_ecn.h>
#include <net/dst.h>

#include <linux/seq_file.h>
#include <linux/memcontrol.h>

extern struct inet_hashinfo tcp_hashinfo;

extern struct percpu_counter tcp_orphan_count;
void tcp_time_wait(struct sock *sk, int state, int timeo);



 * Never offer a window over 32767 without using window scaling. Some
 * poor stacks do signed 16bit maths!

#define MAX_TCP_WINDOW		32767U

/* Minimal accepted MSS. It is (60+60+8) - (20+20). */

#define TCP_MIN_MSS		88U

/* The least MTU to use for probing */

#define TCP_BASE_MSS		1024

/* probing interval, default to 10 minutes as per RFC4821 */


/* Specify interval when tcp mtu probing will stop */


/* After receiving this amount of duplicate ACKs fast retransmit starts. */


/* Maximal number of ACKs sent quickly to accelerate slow-start. */


/* Maximal number of window scale according to RFC1323 */

#define TCP_MAX_WSCALE		14U

/* urg_data states */

#define TCP_URG_VALID	0x0100

#define TCP_URG_NOTYET	0x0200

#define TCP_URG_READ	0x0400

#define TCP_RETR1	3	
                                 * This is how many retries it does before it
                                 * tries to figure out if the gateway is
                                 * down. Minimal RFC value is 3; it corresponds
                                 * to ~3sec-8min depending on RTO.

#define TCP_RETR2	15	
                                 * This should take at least
                                 * 90 minutes to time out.
                                 * RFC1122 says that the limit is 100 sec.
                                 * 15 is ~13-30min depending on RTO.

#define TCP_SYN_RETRIES	 6	
/* This is how many retries are done
                                 * when active opening a connection.
                                 * RFC1122 says the minimum retry MUST
                                 * be at least 180secs.  Nevertheless
                                 * this value is corresponding to
                                 * 63secs of retransmission with the
                                 * current initial RTO.

/* This is how may retries are done
                                 * when passive opening a connection.
                                 * This is corresponding to 31secs of
                                 * retransmission with the current
                                 * initial RTO.

#define TCP_TIMEWAIT_LEN (60*HZ) 
/* how long to wait to destroy TIME-WAIT
                                  * state, about 60 seconds     */

                                 /* BSD style FIN_WAIT2 deadlock breaker.
                                  * It used to be 3min, new value is 60sec,
                                  * to combine FIN-WAIT-2 timeout with
                                  * TIME-WAIT timer.

#define TCP_DELACK_MAX	((unsigned)(HZ/5))	
/* maximal time to delay before sending an ACK */
#if HZ >= 100

#define TCP_DELACK_MIN	((unsigned)(HZ/25))	
/* minimal time to delay before sending an ACK */

#define TCP_ATO_MIN	((unsigned)(HZ/25))


#define TCP_ATO_MIN	4U

#define TCP_RTO_MAX	((unsigned)(120*HZ))

#define TCP_RTO_MIN	((unsigned)(HZ/5))

#define TCP_TIMEOUT_INIT ((unsigned)(1*HZ))	
/* RFC6298 2.1 initial RTO value        */

#define TCP_TIMEOUT_FALLBACK ((unsigned)(3*HZ))	
/* RFC 1122 initial RTO value, now
                                                 * used as a fallback RTO for the
                                                 * initial data transmission if no
                                                 * valid RTT sample has been acquired,
                                                 * most likely due to retrans in 3WHS.

#define TCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ/2U)) 
/* Maximal interval between probes
                                                         * for local resources.

#define TCP_REO_TIMEOUT_MIN	(2000) 
/* Min RACK reordering timeout in usec */

#define TCP_KEEPALIVE_TIME	(120*60*HZ)	
/* two hours */

/* Max of 9 keepalive probes    */


#define MAX_TCP_KEEPIDLE	32767

#define MAX_TCP_KEEPINTVL	32767

#define MAX_TCP_KEEPCNT		127

#define MAX_TCP_SYNCNT		127

/* Period of SYNACK timer */

#define TCP_PAWS_24DAYS	(60 * 60 * 24 * 24)

#define TCP_PAWS_MSL	60		
/* Per-host timestamps are invalidated
                                         * after this time. It should be equal
                                         * (or greater than) TCP_TIMEWAIT_LEN
                                         * to provide reliability equal to one
                                         * provided by timewait state.

#define TCP_PAWS_WINDOW	1		
/* Replay window for per-host
                                         * timestamps. It must be less than
                                         * minimal timewait lifetime.
 *      TCP option

#define TCPOPT_NOP		1	
/* Padding */

#define TCPOPT_EOL		0	
/* End of options */

#define TCPOPT_MSS		2	
/* Segment size negotiating */

#define TCPOPT_WINDOW		3	
/* Window scaling */

#define TCPOPT_SACK_PERM        4       
/* SACK Permitted */

#define TCPOPT_SACK             5       
/* SACK Block */

/* Better RTT estimations/PAWS */

#define TCPOPT_MD5SIG		19	
/* MD5 Signature (RFC2385) */

#define TCPOPT_FASTOPEN		34	
/* Fast open (RFC7413) */

#define TCPOPT_EXP		254	
/* Experimental */
/* Magic number to be after the option value for sharing TCP
 * experimental options. See draft-ietf-tcpm-experimental-options-00.txt


 *     TCP option lengths

#define TCPOLEN_MSS            4

#define TCPOLEN_WINDOW         3

#define TCPOLEN_SACK_PERM      2

#define TCPOLEN_TIMESTAMP      10

#define TCPOLEN_MD5SIG         18



/* But this is what stacks really send out. */









/* Flags in tp->nonagle */

#define TCP_NAGLE_OFF		1	
/* Nagle's algo is disabled */

#define TCP_NAGLE_CORK		2	
/* Socket is corked         */

#define TCP_NAGLE_PUSH		4	
/* Cork is overridden for already queued data */

/* TCP thin-stream limits */

#define TCP_THIN_LINEAR_RETRIES 6       
/* After 6 linear retries, do exp. backoff */

/* TCP initial congestion window as per rfc6928 */

#define TCP_INIT_CWND		10

/* Bit Flags for sysctl_tcp_fastopen */



/* Data in SYN w/o cookie option */

/* Accept SYN data w/o any cookie option */


/* Force enable TFO on all listeners, i.e., not requiring the
 * TCP_FASTOPEN socket option.

#define	TFO_SERVER_WO_SOCKOPT1	0x400

/* sysctl variables for tcp */
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
extern int sysctl_tcp_sack;
extern int sysctl_tcp_fastopen;
extern int sysctl_tcp_retrans_collapse;
extern int sysctl_tcp_stdurg;
extern int sysctl_tcp_rfc1337;
extern int sysctl_tcp_abort_on_overflow;
extern int sysctl_tcp_max_orphans;
extern int sysctl_tcp_fack;
extern int sysctl_tcp_reordering;
extern int sysctl_tcp_max_reordering;
extern int sysctl_tcp_dsack;
extern long sysctl_tcp_mem[3];
extern int sysctl_tcp_wmem[3];
extern int sysctl_tcp_rmem[3];
extern int sysctl_tcp_app_win;
extern int sysctl_tcp_adv_win_scale;
extern int sysctl_tcp_frto;
extern int sysctl_tcp_low_latency;
extern int sysctl_tcp_nometrics_save;
extern int sysctl_tcp_moderate_rcvbuf;
extern int sysctl_tcp_tso_win_divisor;
extern int sysctl_tcp_workaround_signed_windows;
extern int sysctl_tcp_slow_start_after_idle;
extern int sysctl_tcp_thin_linear_timeouts;
extern int sysctl_tcp_thin_dupack;
extern int sysctl_tcp_early_retrans;
extern int sysctl_tcp_recovery;

/* Use RACK to detect losses */

extern int sysctl_tcp_limit_output_bytes;
extern int sysctl_tcp_challenge_ack_limit;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_min_rtt_wlen;
extern int sysctl_tcp_autocorking;
extern int sysctl_tcp_invalid_ratelimit;
extern int sysctl_tcp_pacing_ss_ratio;
extern int sysctl_tcp_pacing_ca_ratio;

extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
extern int tcp_memory_pressure;

/* optimized version of sk_under_memory_pressure() for TCP sockets */

static inline bool tcp_under_memory_pressure(const struct sock *sk) { if (mem_cgroup_sockets_enabled && sk->sk_memcg && mem_cgroup_under_socket_pressure(sk->sk_memcg)) return true; return tcp_memory_pressure; }


Eric Dumazet2573.53%133.33%
Johannes Weiner926.47%266.67%

/* * The next routines deal with comparing 32 bit unsigned ints * and worry about wraparound (automatic with unsigned arithmetic). */
static inline bool before(__u32 seq1, __u32 seq2) { return (__s32)(seq1-seq2) < 0; }


Linus Torvalds (pre-git)1872.00%233.33%
Gerrit Renker416.00%233.33%
David S. Miller28.00%116.67%
Eric Dumazet14.00%116.67%

#define after(seq2, seq1) before(seq1, seq2) /* is s2<=s1<=s3 ? */
static inline bool between(__u32 seq1, __u32 seq2, __u32 seq3) { return seq3 - seq2 >= seq1 - seq2; }


Linus Torvalds (pre-git)2288.00%466.67%
David S. Miller28.00%116.67%
Eric Dumazet14.00%116.67%

static inline bool tcp_out_of_memory(struct sock *sk) { if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && sk_memory_allocated(sk) > sk_prot_mem_limits(sk, 2)) return true; return false; }


Arun Sharma38100.00%1100.00%

void sk_forced_mem_schedule(struct sock *sk, int size);
static inline bool tcp_too_many_orphans(struct sock *sk, int shift) { struct percpu_counter *ocp = sk->sk_prot->orphan_count; int orphans = percpu_counter_read_positive(ocp); if (orphans << shift > sysctl_tcp_max_orphans) { orphans = percpu_counter_sum_positive(ocp); if (orphans << shift > sysctl_tcp_max_orphans) return true; } return false; }


David S. Miller4975.38%150.00%
Pavel Emelyanov1624.62%150.00%

bool tcp_check_oom(struct sock *sk, int shift); extern struct proto tcp_prot; #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field) #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field) #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val) void tcp_tasklet_init(void); void tcp_v4_err(struct sk_buff *skb, u32); void tcp_shutdown(struct sock *sk, int how); void tcp_v4_early_demux(struct sk_buff *skb); int tcp_v4_rcv(struct sk_buff *skb); int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw); int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size); int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); void tcp_release_cb(struct sock *sk); void tcp_wfree(struct sk_buff *skb); void tcp_write_timer_handler(struct sock *sk); void tcp_delack_timer_handler(struct sock *sk); int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb); void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th, unsigned int len); void tcp_rcv_space_adjust(struct sock *sk); int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); void tcp_twsk_destructor(struct sock *sk); ssize_t tcp_splice_read(struct socket *sk, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags);
static inline void tcp_dec_quickack_mode(struct sock *sk, const unsigned int pkts) { struct inet_connection_sock *icsk = inet_csk(sk); if (icsk->icsk_ack.quick) { if (pkts >= icsk->icsk_ack.quick) { icsk->icsk_ack.quick = 0; /* Leaving quickack mode we deflate ATO. */ icsk->icsk_ack.ato = TCP_ATO_MIN; } else icsk->icsk_ack.quick -= pkts; } }


Linus Torvalds (pre-git)2736.00%360.00%
Arnaldo Carvalho de Melo2432.00%120.00%
David S. Miller2432.00%120.00%

#define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 #define TCP_ECN_SEEN 8 enum tcp_tw_status { TCP_TW_SUCCESS = 0, TCP_TW_RST = 1, TCP_TW_ACK = 2, TCP_TW_SYN = 3 }; enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th); struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, bool fastopen); int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); void tcp_enter_loss(struct sock *sk); void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked, int flag); void tcp_clear_retrans(struct tcp_sock *tp); void tcp_update_metrics(struct sock *sk); void tcp_init_metrics(struct sock *sk); void tcp_metrics_init(void); bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst); void tcp_disable_fack(struct tcp_sock *tp); void tcp_close(struct sock *sk, long timeout); void tcp_init_sock(struct sock *sk); unsigned int tcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); int compat_tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); int compat_tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen); void tcp_set_keepalive(struct sock *sk, int val); void tcp_syn_ack_timeout(const struct request_sock *req); int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab, struct tcp_fastopen_cookie *foc); const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * TCP v4 functions exported for the inet6 API */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb); void tcp_v4_mtu_reduced(struct sock *sk); void tcp_req_err(struct sock *sk, u32 seq, bool abort); int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb); struct sock *tcp_create_openreq_child(const struct sock *sk, struct request_sock *req, struct sk_buff *skb); void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst); struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req); int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb); int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len); int tcp_connect(struct sock *sk); enum tcp_synack_type { TCP_SYNACK_NORMAL, TCP_SYNACK_FASTOPEN, TCP_SYNACK_COOKIE, }; struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type); int tcp_disconnect(struct sock *sk, int flags); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, u32 tsoff); int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES /* Syncookies use a monotonic timer which increments every 60 seconds. * This counter is used both as a hash input and partially encoded into * the cookie value. A cookie is only validated further if the delta * between the current counter value and the encoded one is less than this, * i.e. a sent cookie is valid only at most for 2*60 seconds (or less if * the counter advances immediately after a cookie is generated). */ #define MAX_SYNCOOKIE_AGE 2 #define TCP_SYNCOOKIE_PERIOD (60 * HZ) #define TCP_SYNCOOKIE_VALID (MAX_SYNCOOKIE_AGE * TCP_SYNCOOKIE_PERIOD) /* syncookies: remember time of last synqueue overflow * But do not dirty this field too often (once per second is enough) * It is racy as we do not hold a lock, but race is very minor. */
static inline void tcp_synq_overflow(const struct sock *sk) { unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; unsigned long now = jiffies; if (time_after(now, last_overflow + HZ)) tcp_sk(sk)->rx_opt.ts_recent_stamp = now; }


Eric Dumazet54100.00%2100.00%

/* syncookies: no recent synqueue overflow on this listening socket? */
static inline bool tcp_synq_no_recent_overflow(const struct sock *sk) { unsigned long last_overflow = tcp_sk(sk)->rx_opt.ts_recent_stamp; return time_after(jiffies, last_overflow + TCP_SYNCOOKIE_VALID); }


Eric Dumazet36100.00%1100.00%

static inline u32 tcp_cookie_time(void) { u64 val = get_jiffies_64(); do_div(val, TCP_SYNCOOKIE_PERIOD); return val; }


Florian Westphal1560.00%133.33%
Eric Dumazet1040.00%266.67%

u32 __cookie_v4_init_sequence(const struct iphdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v4_init_sequence(const struct sk_buff *skb, __u16 *mss); __u32 cookie_init_timestamp(struct request_sock *req); bool cookie_timestamp_decode(struct tcp_options_received *opt); bool cookie_ecn_ok(const struct tcp_options_received *opt, const struct net *net, const struct dst_entry *dst); /* From net/ipv6/syncookies.c */ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th, u32 cookie); struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs); void tcp_retransmit_timer(struct sock *sk); void tcp_xmit_retransmit_queue(struct sock *); void tcp_simple_retransmit(struct sock *); void tcp_enter_recovery(struct sock *sk, bool ece_ack); int tcp_trim_head(struct sock *, struct sk_buff *, u32); int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t); void tcp_send_probe0(struct sock *); void tcp_send_partial(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); void tcp_send_active_reset(struct sock *sk, gfp_t priority); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); void tcp_skb_collapse_tstamp(struct sk_buff *skb, const struct sk_buff *next_skb); /* tcp_input.c */ void tcp_rearm_rto(struct sock *sk); void tcp_synack_rtt_meas(struct sock *sk, struct request_sock *req); void tcp_reset(struct sock *sk); void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb); void tcp_fin(struct sock *sk); /* tcp_timer.c */ void tcp_init_xmit_timers(struct sock *);
static inline void tcp_clear_xmit_timers(struct sock *sk) { inet_csk_clear_xmit_timers(sk); }


Arnaldo Carvalho de Melo1058.82%133.33%
Linus Torvalds (pre-git)741.18%266.67%

unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu); unsigned int tcp_current_mss(struct sock *sk); /* Bound MSS / TSO packet size with the half of the window */
static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) { int cutoff; /* When peer uses tiny windows, there is no use in packetizing * to sub-MSS pieces for the sake of SWS or making sure there * are enough packets in the pipe for fast recovery. * * On the other hand, for extremely large MSS devices, handling * smaller than MSS windows in this way does make sense. */ if (tp->max_window > TCP_MSS_DEFAULT) cutoff = (tp->max_window >> 1); else cutoff = tp->max_window; if (cutoff && pktsize > cutoff) return max_t(int, cutoff, 68U - tp->tcp_header_len); else return pktsize; }


Ilpo Järvinen4260.00%133.33%
Alexey Kuznetsov2637.14%133.33%
Shane M Seymour22.86%133.33%

/* tcp.c */ void tcp_get_info(