cregit-Linux how code gets into the kernel

Release 4.11 net/netfilter/nf_conntrack_proto_tcp.c

Directory: net/netfilter
/* (C) 1999-2001 Paul `Rusty' Russell
 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
 * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
 * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#include <linux/types.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>

#include <net/tcp.h>

#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>

/* "Be conservative in what you do,
    be liberal in what you accept from others."
    If it's non-zero, we mark only out of window RST segments as INVALID. */

static int nf_ct_tcp_be_liberal __read_mostly = 0;

/* If it is set to zero, we disable picking up already established
   connections. */

static int nf_ct_tcp_loose __read_mostly = 1;

/* Max number of the retransmitted packets without receiving an (acceptable)
   ACK from the destination. If this number is reached, a shorter timer
   will be started. */

static int nf_ct_tcp_max_retrans __read_mostly = 3;

  /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
     closely.  They're more complex. --RR */


static const char *const tcp_conntrack_names[] = {
	"NONE",
	"SYN_SENT",
	"SYN_RECV",
	"ESTABLISHED",
	"FIN_WAIT",
	"CLOSE_WAIT",
	"LAST_ACK",
	"TIME_WAIT",
	"CLOSE",
	"SYN_SENT2",
};


#define SECS * HZ

#define MINS * 60 SECS

#define HOURS * 60 MINS

#define DAYS * 24 HOURS


static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
/* RFC1122 says the R2 limit should be at least 100 seconds.
   Linux uses 15 packets as limit, which corresponds
   to ~13-30min depending on RTO. */
	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
	[TCP_CONNTRACK_UNACK]		= 5 MINS,
};


#define sNO TCP_CONNTRACK_NONE

#define sSS TCP_CONNTRACK_SYN_SENT

#define sSR TCP_CONNTRACK_SYN_RECV

#define sES TCP_CONNTRACK_ESTABLISHED

#define sFW TCP_CONNTRACK_FIN_WAIT

#define sCW TCP_CONNTRACK_CLOSE_WAIT

#define sLA TCP_CONNTRACK_LAST_ACK

#define sTW TCP_CONNTRACK_TIME_WAIT

#define sCL TCP_CONNTRACK_CLOSE

#define sS2 TCP_CONNTRACK_SYN_SENT2

#define sIV TCP_CONNTRACK_MAX

#define sIG TCP_CONNTRACK_IGNORE

/* What TCP flags are set from RST/SYN/FIN/ACK. */

enum tcp_bit_set {
	
TCP_SYN_SET,
	
TCP_SYNACK_SET,
	
TCP_FIN_SET,
	
TCP_ACK_SET,
	
TCP_RST_SET,
	
TCP_NONE_SET,
};

/*
 * The TCP state transition table needs a few words...
 *
 * We are the man in the middle. All the packets go through us
 * but might get lost in transit to the destination.
 * It is assumed that the destinations can't receive segments
 * we haven't seen.
 *
 * The checked segment is in window, but our windows are *not*
 * equivalent with the ones of the sender/receiver. We always
 * try to guess the state of the current sender.
 *
 * The meaning of the states are:
 *
 * NONE:        initial state
 * SYN_SENT:    SYN-only packet seen
 * SYN_SENT2:   SYN-only packet seen from reply dir, simultaneous open
 * SYN_RECV:    SYN-ACK packet seen
 * ESTABLISHED: ACK packet seen
 * FIN_WAIT:    FIN packet seen
 * CLOSE_WAIT:  ACK seen (after FIN)
 * LAST_ACK:    FIN seen (after FIN)
 * TIME_WAIT:   last ACK seen
 * CLOSE:       closed connection (RST)
 *
 * Packets marked as IGNORED (sIG):
 *      if they may be either invalid or valid
 *      and the receiver may send back a connection
 *      closing RST or a SYN/ACK.
 *
 * Packets marked as INVALID (sIV):
 *      if we regard them as truly invalid packets
 */

static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
	{
/* ORIGINAL */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
/*
 *      sNO -> sSS      Initialize a new connection
 *      sSS -> sSS      Retransmitted SYN
 *      sS2 -> sS2      Late retransmitted SYN
 *      sSR -> sIG
 *      sES -> sIG      Error: SYNs in window outside the SYN_SENT state
 *                      are errors. Receiver will reply with RST
 *                      and close the connection.
 *                      Or we are not in sync and hold a dead connection.
 *      sFW -> sIG
 *      sCW -> sIG
 *      sLA -> sIG
 *      sTW -> sSS      Reopened connection (RFC 1122).
 *      sCL -> sSS
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
/*
 *      sNO -> sIV      Too late and no reason to do anything
 *      sSS -> sIV      Client can't send SYN and then SYN/ACK
 *      sS2 -> sSR      SYN/ACK sent to SYN2 in simultaneous open
 *      sSR -> sSR      Late retransmitted SYN/ACK in simultaneous open
 *      sES -> sIV      Invalid SYN/ACK packets sent by the client
 *      sFW -> sIV
 *      sCW -> sIV
 *      sLA -> sIV
 *      sTW -> sIV
 *      sCL -> sIV
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
 *      sNO -> sIV      Too late and no reason to do anything...
 *      sSS -> sIV      Client migth not send FIN in this state:
 *                      we enforce waiting for a SYN/ACK reply first.
 *      sS2 -> sIV
 *      sSR -> sFW      Close started.
 *      sES -> sFW
 *      sFW -> sLA      FIN seen in both directions, waiting for
 *                      the last ACK.
 *                      Migth be a retransmitted FIN as well...
 *      sCW -> sLA
 *      sLA -> sLA      Retransmitted FIN. Remain in the same state.
 *      sTW -> sTW
 *      sCL -> sCL
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
 *      sNO -> sES      Assumed.
 *      sSS -> sIV      ACK is invalid: we haven't seen a SYN/ACK yet.
 *      sS2 -> sIV
 *      sSR -> sES      Established state is reached.
 *      sES -> sES      :-)
 *      sFW -> sCW      Normal close request answered by ACK.
 *      sCW -> sCW
 *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
 *      sTW -> sTW      Retransmitted last ACK. Remain in the same state.
 *      sCL -> sCL
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
	},
	{
/* REPLY */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
/*
 *      sNO -> sIV      Never reached.
 *      sSS -> sS2      Simultaneous open
 *      sS2 -> sS2      Retransmitted simultaneous SYN
 *      sSR -> sIV      Invalid SYN packets sent by the server
 *      sES -> sIV
 *      sFW -> sIV
 *      sCW -> sIV
 *      sLA -> sIV
 *      sTW -> sSS      Reopened connection, but server may have switched role
 *      sCL -> sIV
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
 *      sSS -> sSR      Standard open.
 *      sS2 -> sSR      Simultaneous open
 *      sSR -> sIG      Retransmitted SYN/ACK, ignore it.
 *      sES -> sIG      Late retransmitted SYN/ACK?
 *      sFW -> sIG      Might be SYN/ACK answering ignored SYN
 *      sCW -> sIG
 *      sLA -> sIG
 *      sTW -> sIG
 *      sCL -> sIG
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
 *      sSS -> sIV      Server might not send FIN in this state.
 *      sS2 -> sIV
 *      sSR -> sFW      Close started.
 *      sES -> sFW
 *      sFW -> sLA      FIN seen in both directions.
 *      sCW -> sLA
 *      sLA -> sLA      Retransmitted FIN.
 *      sTW -> sTW
 *      sCL -> sCL
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
/*
 *      sSS -> sIG      Might be a half-open connection.
 *      sS2 -> sIG
 *      sSR -> sSR      Might answer late resent SYN.
 *      sES -> sES      :-)
 *      sFW -> sCW      Normal close request answered by ACK.
 *      sCW -> sCW
 *      sLA -> sTW      Last ACK detected (RFC5961 challenged)
 *      sTW -> sTW      Retransmitted last ACK.
 *      sCL -> sCL
 */
/*           sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2   */
/*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
	}
};


static inline struct nf_tcp_net *tcp_pernet(struct net *net) { return &net->ct.nf_ct_proto.tcp; }

Contributors

PersonTokensPropCommitsCommitProp
Gao Feng24100.00%1100.00%
Total24100.00%1100.00%


static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { const struct tcphdr *hp; struct tcphdr _hdr; /* Actually only need first 4 bytes to get ports. */ hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; tuple->src.u.tcp.port = hp->source; tuple->dst.u.tcp.port = hp->dest; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai7683.52%120.00%
Jan Engelhardt88.79%240.00%
Eric W. Biedermann55.49%120.00%
Gao Feng22.20%120.00%
Total91100.00%5100.00%


static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig) { tuple->src.u.tcp.port = orig->dst.u.tcp.port; tuple->dst.u.tcp.port = orig->src.u.tcp.port; return true; }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai5896.67%150.00%
Jan Engelhardt23.33%150.00%
Total60100.00%2100.00%

/* Print out the per-protocol part of the tuple. */
static void tcp_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.tcp.port), ntohs(tuple->dst.u.tcp.port)); }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai4896.00%150.00%
Joe Perches24.00%150.00%
Total50100.00%2100.00%

/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai2573.53%125.00%
Florian Westphal617.65%125.00%
Steven Rostedt25.88%125.00%
Patrick McHardy12.94%125.00%
Total34100.00%4100.00%


static unsigned int get_conntrack_index(const struct tcphdr *tcph) { if (tcph->rst) return TCP_RST_SET; else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); else if (tcph->fin) return TCP_FIN_SET; else if (tcph->ack) return TCP_ACK_SET; else return TCP_NONE_SET; }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai64100.00%1100.00%
Total64100.00%1100.00%

/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering in IP Filter' by Guido van Rooij. http://www.sane.nl/events/sane2000/papers.html http://www.darkart.com/mirrors/www.obfuscation.org/ipf/ The boundaries and the conditions are changed according to RFC793: the packet must intersect the window (i.e. segments may be after the right or before the left edge) and thus receivers may ACK segments after the right edge of the window. td_maxend = max(sack + max(win,1)) seen in reply packets td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets td_maxwin += seq + len - sender.td_maxend if seq + len > sender.td_maxend td_end = max(seq + len) seen in sent packets I. Upper bound for valid data: seq <= sender.td_maxend II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin III. Upper bound for valid (s)ack: sack <= receiver.td_end IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW where sack is the highest right edge of sack block found in the packet or ack in the case of packet without SACK option. The upper bound limit for a valid (s)ack is not ignored - we doesn't have to deal with fragments. */
static inline __u32 segment_seq_plus_len(__u32 seq, size_t len, unsigned int dataoff, const struct tcphdr *tcph) { /* XXX Should I use payload length field in IP/IPv6 header ? * - YK */ return (seq + len - dataoff - tcph->doff*4 + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai5898.31%150.00%
Jan Engelhardt11.69%150.00%
Total59100.00%2100.00%

/* Fixme: what about big packets? */ #define MAXACKWINCONST 66000 #define MAXACKWINDOW(sender) \ ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ : MAXACKWINCONST) /* * Simplified tcp_parse_options routine from tcp_input.c */
static void tcp_options(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, struct ip_ct_tcp_state *state) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); if (!length) return; ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), length, buff); BUG_ON(ptr == NULL); state->td_scale = state->flags = 0; while (length > 0) { int opcode=*ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize=*ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_SACK_PERM && opsize == TCPOLEN_SACK_PERM) state->flags |= IP_CT_TCP_FLAG_SACK_PERM; else if (opcode == TCPOPT_WINDOW && opsize == TCPOLEN_WINDOW) { state->td_scale = *(u_int8_t *)ptr; if (state->td_scale > 14) { /* See RFC1323 */ state->td_scale = 14; } state->flags |= IP_CT_TCP_FLAG_WINDOW_SCALE; } ptr += opsize - 2; length -= opsize; } } }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai23895.97%125.00%
Jozsef Kadlecsik83.23%250.00%
Jan Engelhardt20.81%125.00%
Total248100.00%4100.00%


static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, __u32 *sack) { unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; const unsigned char *ptr; int length = (tcph->doff*4) - sizeof(struct tcphdr); __u32 tmp; if (!length) return; ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), length, buff); BUG_ON(ptr == NULL); /* Fast path for timestamp-only option */ if (length == TCPOLEN_TSTAMP_ALIGNED && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) return; while (length > 0) { int opcode = *ptr++; int opsize, i; switch (opcode) { case TCPOPT_EOL: return; case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ length--; continue; default: if (length < 2) return; opsize = *ptr++; if (opsize < 2) /* "silly options" */ return; if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_SACK && opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK) && !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK)) { for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); if (after(tmp, *sack)) *sack = tmp; } return; } ptr += opsize - 2; length -= opsize; } } }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai28094.28%114.29%
Jozsef Kadlecsik82.69%228.57%
Patrick McHardy51.68%114.29%
Jan Engelhardt20.67%114.29%
Mark H. Weaver10.34%114.29%
Hideaki Yoshifuji / 吉藤英明10.34%114.29%
Total297100.00%7100.00%


static bool tcp_in_window(const struct nf_conn *ct, struct ip_ct_tcp *state, enum ip_conntrack_dir dir, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, u_int8_t pf) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; __u32 seq, ack, sack, end, win, swin; s32 receiver_offset; bool res, in_recv_win; /* * Get the required data from the packet. */ seq = ntohl(tcph->seq); ack = sack = ntohl(tcph->ack_seq); win = ntohs(tcph->window); end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) tcp_sack(skb, dataoff, tcph, &sack); /* Take into account NAT sequence number mangling */ receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1); ack -= receiver_offset; sack -= receiver_offset; pr_debug("tcp_in_window: START\n"); pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); if (sender->td_maxwin == 0) { /* * Initialize sender data. */ if (tcph->syn) { /* * SYN-ACK in reply to a SYN * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); /* * RFC 1323: * Both sides must send the Window Scale option * to enable window scaling in either direction. */ if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; if (!tcph->ack) /* Simultaneous open */ return true; } else { /* * We are in the middle of a connection, * its history is lost for us. * Let's try to use the data from the packet. */ sender->td_end = end; swin = win << sender->td_scale; sender->td_maxwin = (swin == 0 ? 1 : swin); sender->td_maxend = end + sender->td_maxwin; /* * We haven't seen traffic in the other direction yet * but we have to tweak window tracking to pass III * and IV until that happens. */ if (receiver->td_maxwin == 0) receiver->td_end = receiver->td_maxend = sack; } } else if (((state->state == TCP_CONNTRACK_SYN_SENT && dir == IP_CT_DIR_ORIGINAL) || (state->state == TCP_CONNTRACK_SYN_RECV && dir == IP_CT_DIR_REPLY)) && after(end, sender->td_end)) { /* * RFC 793: "if a TCP is reinitialized ... then it need * not wait at all; it must only be sure to use sequence * numbers larger than those recently used." */ sender->td_end = sender->td_maxend = end; sender->td_maxwin = (win == 0 ? 1 : win); tcp_options(skb, dataoff, tcph, sender); } if (!(tcph->ack)) { /* * If there is no ACK, just pretend it was set and OK. */ ack = sack = receiver->td_end; } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == (TCP_FLAG_ACK|TCP_FLAG_RST)) && (ack == 0)) { /* * Broken TCP stacks, that set ACK in RST packets as well * with zero ack value. */ ack = sack = receiver->td_end; } if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT) /* * RST sent answering SYN. */ seq = end = sender->td_end; pr_debug("tcp_in_window: "); nf_ct_dump_tuple(tuple); pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n", seq, ack, receiver_offset, sack, receiver_offset, win, end); pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); /* Is the ending sequence in the receive window (if available)? */ in_recv_win = !receiver->td_maxwin || after(end, sender->td_end - receiver->td_maxwin - 1); pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n", before(seq, sender->td_maxend + 1), (in_recv_win ? 1 : 0), before(sack, receiver->td_end + 1), after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)); if (before(seq, sender->td_maxend + 1) && in_recv_win && before(sack, receiver->td_end + 1) && after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { /* * Take into account window scaling (RFC 1323). */ if (!tcph->syn) win <<= sender->td_scale; /* * Update sender data. */ swin = win + (sack - ack); if (sender->td_maxwin < swin) sender->td_maxwin = swin; if (after(end, sender->td_end)) { sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; } if (tcph->ack) { if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { sender->td_maxack = ack; sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; } else if (after(ack, sender->td_maxack)) sender->td_maxack = ack; } /* * Update receiver data. */ if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) receiver->td_maxwin += end - sender->td_maxend; if (after(sack + win, receiver->td_maxend - 1)) { receiver->td_maxend = sack + win; if (win == 0) receiver->td_maxend++; } if (ack == receiver->td_end) receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; /* * Check retransmissions. */ if (index == TCP_ACK_SET) { if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack && state->last_end == end && state->last_win == win) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; state->last_win = win; state->retrans = 0; } } res = true; } else { res = false; if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal) res = true; if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? in_recv_win ? before(sack, receiver->td_end + 1) ? after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" : "ACK is under the lower bound (possible overly delayed ACK)" : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " "receiver end=%u maxend=%u maxwin=%u\n", res, sender->td_end, sender->td_maxend, sender->td_maxwin, receiver->td_end, receiver->td_maxend, receiver->td_maxwin); return res; }

Contributors

PersonTokensPropCommitsCommitProp
Yasuyuki Kozakai90274.42%14.35%
Jozsef Kadlecsik1098.99%521.74%
Patrick McHardy786.44%521.74%
Yuchung Cheng332.72%14.35%
Pablo Neira Ayuso302.48%14.35%
Gao Feng151.24%28.70%
Alexey Dobriyan120.99%14.35%
George Hansper120.99%14.35%
Jan Engelhardt120.99%417.39%
Changli Gao50.41%14.35%
Hideaki Yoshifuji / 吉藤英明40.33%14.35%
Total1212100.00%23100.00%

/* table of valid flag combinations - PUSH, ECE and CWR are always valid */ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| TCPHDR_URG) + 1] = { [TCPHDR_SYN] = 1, [TCPHDR_SYN|TCPHDR_URG] = 1, [TCPHDR_SYN|TCPHDR_ACK] = 1, [TCPHDR_RST] = 1, [TCPHDR_RST|TCPHDR_ACK] = 1, [TCPHDR_FIN|TCPHDR_ACK] = 1, [TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1, [TCPHDR_ACK] = 1, [TCPHDR_ACK|TCPHDR_URG] = 1, }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; struct tcphdr _tcph; unsigned int tcplen = skb->len - dataoff; u_int8_t tcpflags; /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; } /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { if (LOG_INVALID(net, IPPROTO_TCP)