Release 4.11 net/netfilter/nf_conntrack_proto_tcp.c
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
* (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
* (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <linux/types.h>
#include <linux/timer.h>
#include <linux/module.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/spinlock.h>
#include <linux/skbuff.h>
#include <linux/ipv6.h>
#include <net/ip6_checksum.h>
#include <asm/unaligned.h>
#include <net/tcp.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_log.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
static int nf_ct_tcp_be_liberal __read_mostly = 0;
/* If it is set to zero, we disable picking up already established
connections. */
static int nf_ct_tcp_loose __read_mostly = 1;
/* Max number of the retransmitted packets without receiving an (acceptable)
ACK from the destination. If this number is reached, a shorter timer
will be started. */
static int nf_ct_tcp_max_retrans __read_mostly = 3;
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR */
static const char *const tcp_conntrack_names[] = {
"NONE",
"SYN_SENT",
"SYN_RECV",
"ESTABLISHED",
"FIN_WAIT",
"CLOSE_WAIT",
"LAST_ACK",
"TIME_WAIT",
"CLOSE",
"SYN_SENT2",
};
#define SECS * HZ
#define MINS * 60 SECS
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
static unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] __read_mostly = {
[TCP_CONNTRACK_SYN_SENT] = 2 MINS,
[TCP_CONNTRACK_SYN_RECV] = 60 SECS,
[TCP_CONNTRACK_ESTABLISHED] = 5 DAYS,
[TCP_CONNTRACK_FIN_WAIT] = 2 MINS,
[TCP_CONNTRACK_CLOSE_WAIT] = 60 SECS,
[TCP_CONNTRACK_LAST_ACK] = 30 SECS,
[TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
[TCP_CONNTRACK_CLOSE] = 10 SECS,
[TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
/* RFC1122 says the R2 limit should be at least 100 seconds.
Linux uses 15 packets as limit, which corresponds
to ~13-30min depending on RTO. */
[TCP_CONNTRACK_RETRANS] = 5 MINS,
[TCP_CONNTRACK_UNACK] = 5 MINS,
};
#define sNO TCP_CONNTRACK_NONE
#define sSS TCP_CONNTRACK_SYN_SENT
#define sSR TCP_CONNTRACK_SYN_RECV
#define sES TCP_CONNTRACK_ESTABLISHED
#define sFW TCP_CONNTRACK_FIN_WAIT
#define sCW TCP_CONNTRACK_CLOSE_WAIT
#define sLA TCP_CONNTRACK_LAST_ACK
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
#define sS2 TCP_CONNTRACK_SYN_SENT2
#define sIV TCP_CONNTRACK_MAX
#define sIG TCP_CONNTRACK_IGNORE
/* What TCP flags are set from RST/SYN/FIN/ACK. */
enum tcp_bit_set {
TCP_SYN_SET,
TCP_SYNACK_SET,
TCP_FIN_SET,
TCP_ACK_SET,
TCP_RST_SET,
TCP_NONE_SET,
};
/*
* The TCP state transition table needs a few words...
*
* We are the man in the middle. All the packets go through us
* but might get lost in transit to the destination.
* It is assumed that the destinations can't receive segments
* we haven't seen.
*
* The checked segment is in window, but our windows are *not*
* equivalent with the ones of the sender/receiver. We always
* try to guess the state of the current sender.
*
* The meaning of the states are:
*
* NONE: initial state
* SYN_SENT: SYN-only packet seen
* SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
* SYN_RECV: SYN-ACK packet seen
* ESTABLISHED: ACK packet seen
* FIN_WAIT: FIN packet seen
* CLOSE_WAIT: ACK seen (after FIN)
* LAST_ACK: FIN seen (after FIN)
* TIME_WAIT: last ACK seen
* CLOSE: closed connection (RST)
*
* Packets marked as IGNORED (sIG):
* if they may be either invalid or valid
* and the receiver may send back a connection
* closing RST or a SYN/ACK.
*
* Packets marked as INVALID (sIV):
* if we regard them as truly invalid packets
*/
static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
/*
* sNO -> sSS Initialize a new connection
* sSS -> sSS Retransmitted SYN
* sS2 -> sS2 Late retransmitted SYN
* sSR -> sIG
* sES -> sIG Error: SYNs in window outside the SYN_SENT state
* are errors. Receiver will reply with RST
* and close the connection.
* Or we are not in sync and hold a dead connection.
* sFW -> sIG
* sCW -> sIG
* sLA -> sIG
* sTW -> sSS Reopened connection (RFC 1122).
* sCL -> sSS
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
/*
* sNO -> sIV Too late and no reason to do anything
* sSS -> sIV Client can't send SYN and then SYN/ACK
* sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
* sSR -> sSR Late retransmitted SYN/ACK in simultaneous open
* sES -> sIV Invalid SYN/ACK packets sent by the client
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
* sTW -> sIV
* sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sNO -> sIV Too late and no reason to do anything...
* sSS -> sIV Client migth not send FIN in this state:
* we enforce waiting for a SYN/ACK reply first.
* sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions, waiting for
* the last ACK.
* Migth be a retransmitted FIN as well...
* sCW -> sLA
* sLA -> sLA Retransmitted FIN. Remain in the same state.
* sTW -> sTW
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sNO -> sES Assumed.
* sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
* sS2 -> sIV
* sSR -> sES Established state is reached.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
* sLA -> sTW Last ACK detected (RFC5961 challenged)
* sTW -> sTW Retransmitted last ACK. Remain in the same state.
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
/*
* sNO -> sIV Never reached.
* sSS -> sS2 Simultaneous open
* sS2 -> sS2 Retransmitted simultaneous SYN
* sSR -> sIV Invalid SYN packets sent by the server
* sES -> sIV
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
* sTW -> sSS Reopened connection, but server may have switched role
* sCL -> sIV
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
* sSS -> sSR Standard open.
* sS2 -> sSR Simultaneous open
* sSR -> sIG Retransmitted SYN/ACK, ignore it.
* sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG Might be SYN/ACK answering ignored SYN
* sCW -> sIG
* sLA -> sIG
* sTW -> sIG
* sCL -> sIG
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sSS -> sIV Server might not send FIN in this state.
* sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions.
* sCW -> sLA
* sLA -> sLA Retransmitted FIN.
* sTW -> sTW
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
/*
* sSS -> sIG Might be a half-open connection.
* sS2 -> sIG
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
* sCW -> sCW
* sLA -> sTW Last ACK detected (RFC5961 challenged)
* sTW -> sTW Retransmitted last ACK.
* sCL -> sCL
*/
/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
static inline struct nf_tcp_net *tcp_pernet(struct net *net)
{
return &net->ct.nf_ct_proto.tcp;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Gao Feng | 24 | 100.00% | 1 | 100.00% |
Total | 24 | 100.00% | 1 | 100.00% |
static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct tcphdr *hp;
struct tcphdr _hdr;
/* Actually only need first 4 bytes to get ports. */
hp = skb_header_pointer(skb, dataoff, 4, &_hdr);
if (hp == NULL)
return false;
tuple->src.u.tcp.port = hp->source;
tuple->dst.u.tcp.port = hp->dest;
return true;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 76 | 83.52% | 1 | 20.00% |
Jan Engelhardt | 8 | 8.79% | 2 | 40.00% |
Eric W. Biedermann | 5 | 5.49% | 1 | 20.00% |
Gao Feng | 2 | 2.20% | 1 | 20.00% |
Total | 91 | 100.00% | 5 | 100.00% |
static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
tuple->src.u.tcp.port = orig->dst.u.tcp.port;
tuple->dst.u.tcp.port = orig->src.u.tcp.port;
return true;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 58 | 96.67% | 1 | 50.00% |
Jan Engelhardt | 2 | 3.33% | 1 | 50.00% |
Total | 60 | 100.00% | 2 | 100.00% |
/* Print out the per-protocol part of the tuple. */
static void tcp_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
seq_printf(s, "sport=%hu dport=%hu ",
ntohs(tuple->src.u.tcp.port),
ntohs(tuple->dst.u.tcp.port));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 48 | 96.00% | 1 | 50.00% |
Joe Perches | 2 | 4.00% | 1 | 50.00% |
Total | 50 | 100.00% | 2 | 100.00% |
/* Print out the private part of the conntrack. */
static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 25 | 73.53% | 1 | 25.00% |
Florian Westphal | 6 | 17.65% | 1 | 25.00% |
Steven Rostedt | 2 | 5.88% | 1 | 25.00% |
Patrick McHardy | 1 | 2.94% | 1 | 25.00% |
Total | 34 | 100.00% | 4 | 100.00% |
static unsigned int get_conntrack_index(const struct tcphdr *tcph)
{
if (tcph->rst) return TCP_RST_SET;
else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
else if (tcph->fin) return TCP_FIN_SET;
else if (tcph->ack) return TCP_ACK_SET;
else return TCP_NONE_SET;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 64 | 100.00% | 1 | 100.00% |
Total | 64 | 100.00% | 1 | 100.00% |
/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
in IP Filter' by Guido van Rooij.
http://www.sane.nl/events/sane2000/papers.html
http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
The boundaries and the conditions are changed according to RFC793:
the packet must intersect the window (i.e. segments may be
after the right or before the left edge) and thus receivers may ACK
segments after the right edge of the window.
td_maxend = max(sack + max(win,1)) seen in reply packets
td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
td_maxwin += seq + len - sender.td_maxend
if seq + len > sender.td_maxend
td_end = max(seq + len) seen in sent packets
I. Upper bound for valid data: seq <= sender.td_maxend
II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
III. Upper bound for valid (s)ack: sack <= receiver.td_end
IV. Lower bound for valid (s)ack: sack >= receiver.td_end - MAXACKWINDOW
where sack is the highest right edge of sack block found in the packet
or ack in the case of packet without SACK option.
The upper bound limit for a valid (s)ack is not ignored -
we doesn't have to deal with fragments.
*/
static inline __u32 segment_seq_plus_len(__u32 seq,
size_t len,
unsigned int dataoff,
const struct tcphdr *tcph)
{
/* XXX Should I use payload length field in IP/IPv6 header ?
* - YK */
return (seq + len - dataoff - tcph->doff*4
+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 58 | 98.31% | 1 | 50.00% |
Jan Engelhardt | 1 | 1.69% | 1 | 50.00% |
Total | 59 | 100.00% | 2 | 100.00% |
/* Fixme: what about big packets? */
#define MAXACKWINCONST 66000
#define MAXACKWINDOW(sender) \
((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
: MAXACKWINCONST)
/*
* Simplified tcp_parse_options routine from tcp_input.c
*/
static void tcp_options(const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
struct ip_ct_tcp_state *state)
{
unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
const unsigned char *ptr;
int length = (tcph->doff*4) - sizeof(struct tcphdr);
if (!length)
return;
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
BUG_ON(ptr == NULL);
state->td_scale =
state->flags = 0;
while (length > 0) {
int opcode=*ptr++;
int opsize;
switch (opcode) {
case TCPOPT_EOL:
return;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
if (length < 2)
return;
opsize=*ptr++;
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_SACK_PERM
&& opsize == TCPOLEN_SACK_PERM)
state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
else if (opcode == TCPOPT_WINDOW
&& opsize == TCPOLEN_WINDOW) {
state->td_scale = *(u_int8_t *)ptr;
if (state->td_scale > 14) {
/* See RFC1323 */
state->td_scale = 14;
}
state->flags |=
IP_CT_TCP_FLAG_WINDOW_SCALE;
}
ptr += opsize - 2;
length -= opsize;
}
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 238 | 95.97% | 1 | 25.00% |
Jozsef Kadlecsik | 8 | 3.23% | 2 | 50.00% |
Jan Engelhardt | 2 | 0.81% | 1 | 25.00% |
Total | 248 | 100.00% | 4 | 100.00% |
static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
const struct tcphdr *tcph, __u32 *sack)
{
unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
const unsigned char *ptr;
int length = (tcph->doff*4) - sizeof(struct tcphdr);
__u32 tmp;
if (!length)
return;
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
BUG_ON(ptr == NULL);
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED
&& *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
| (TCPOPT_NOP << 16)
| (TCPOPT_TIMESTAMP << 8)
| TCPOLEN_TIMESTAMP))
return;
while (length > 0) {
int opcode = *ptr++;
int opsize, i;
switch (opcode) {
case TCPOPT_EOL:
return;
case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
length--;
continue;
default:
if (length < 2)
return;
opsize = *ptr++;
if (opsize < 2) /* "silly options" */
return;
if (opsize > length)
return; /* don't parse partial options */
if (opcode == TCPOPT_SACK
&& opsize >= (TCPOLEN_SACK_BASE
+ TCPOLEN_SACK_PERBLOCK)
&& !((opsize - TCPOLEN_SACK_BASE)
% TCPOLEN_SACK_PERBLOCK)) {
for (i = 0;
i < (opsize - TCPOLEN_SACK_BASE);
i += TCPOLEN_SACK_PERBLOCK) {
tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
if (after(tmp, *sack))
*sack = tmp;
}
return;
}
ptr += opsize - 2;
length -= opsize;
}
}
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 280 | 94.28% | 1 | 14.29% |
Jozsef Kadlecsik | 8 | 2.69% | 2 | 28.57% |
Patrick McHardy | 5 | 1.68% | 1 | 14.29% |
Jan Engelhardt | 2 | 0.67% | 1 | 14.29% |
Mark H. Weaver | 1 | 0.34% | 1 | 14.29% |
Hideaki Yoshifuji / 吉藤英明 | 1 | 0.34% | 1 | 14.29% |
Total | 297 | 100.00% | 7 | 100.00% |
static bool tcp_in_window(const struct nf_conn *ct,
struct ip_ct_tcp *state,
enum ip_conntrack_dir dir,
unsigned int index,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
u_int8_t pf)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = tcp_pernet(net);
struct ip_ct_tcp_state *sender = &state->seen[dir];
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
s32 receiver_offset;
bool res, in_recv_win;
/*
* Get the required data from the packet.
*/
seq = ntohl(tcph->seq);
ack = sack = ntohl(tcph->ack_seq);
win = ntohs(tcph->window);
end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
tcp_sack(skb, dataoff, tcph, &sack);
/* Take into account NAT sequence number mangling */
receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
ack -= receiver_offset;
sack -= receiver_offset;
pr_debug("tcp_in_window: START\n");
pr_debug("tcp_in_window: ");
nf_ct_dump_tuple(tuple);
pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
seq, ack, receiver_offset, sack, receiver_offset, win, end);
pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
if (sender->td_maxwin == 0) {
/*
* Initialize sender data.
*/
if (tcph->syn) {
/*
* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(skb, dataoff, tcph, sender);
/*
* RFC 1323:
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
&& receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
sender->td_scale =
receiver->td_scale = 0;
if (!tcph->ack)
/* Simultaneous open */
return true;
} else {
/*
* We are in the middle of a connection,
* its history is lost for us.
* Let's try to use the data from the packet.
*/
sender->td_end = end;
swin = win << sender->td_scale;
sender->td_maxwin = (swin == 0 ? 1 : swin);
sender->td_maxend = end + sender->td_maxwin;
/*
* We haven't seen traffic in the other direction yet
* but we have to tweak window tracking to pass III
* and IV until that happens.
*/
if (receiver->td_maxwin == 0)
receiver->td_end = receiver->td_maxend = sack;
}
} else if (((state->state == TCP_CONNTRACK_SYN_SENT
&& dir == IP_CT_DIR_ORIGINAL)
|| (state->state == TCP_CONNTRACK_SYN_RECV
&& dir == IP_CT_DIR_REPLY))
&& after(end, sender->td_end)) {
/*
* RFC 793: "if a TCP is reinitialized ... then it need
* not wait at all; it must only be sure to use sequence
* numbers larger than those recently used."
*/
sender->td_end =
sender->td_maxend = end;
sender->td_maxwin = (win == 0 ? 1 : win);
tcp_options(skb, dataoff, tcph, sender);
}
if (!(tcph->ack)) {
/*
* If there is no ACK, just pretend it was set and OK.
*/
ack = sack = receiver->td_end;
} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
(TCP_FLAG_ACK|TCP_FLAG_RST))
&& (ack == 0)) {
/*
* Broken TCP stacks, that set ACK in RST packets as well
* with zero ack value.
*/
ack = sack = receiver->td_end;
}
if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
/*
* RST sent answering SYN.
*/
seq = end = sender->td_end;
pr_debug("tcp_in_window: ");
nf_ct_dump_tuple(tuple);
pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
seq, ack, receiver_offset, sack, receiver_offset, win, end);
pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
/* Is the ending sequence in the receive window (if available)? */
in_recv_win = !receiver->td_maxwin ||
after(end, sender->td_end - receiver->td_maxwin - 1);
pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
before(seq, sender->td_maxend + 1),
(in_recv_win ? 1 : 0),
before(sack, receiver->td_end + 1),
after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
if (before(seq, sender->td_maxend + 1) &&
in_recv_win &&
before(sack, receiver->td_end + 1) &&
after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
/*
* Take into account window scaling (RFC 1323).
*/
if (!tcph->syn)
win <<= sender->td_scale;
/*
* Update sender data.
*/
swin = win + (sack - ack);
if (sender->td_maxwin < swin)
sender->td_maxwin = swin;
if (after(end, sender->td_end)) {
sender->td_end = end;
sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
}
if (tcph->ack) {
if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
sender->td_maxack = ack;
sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
} else if (after(ack, sender->td_maxack))
sender->td_maxack = ack;
}
/*
* Update receiver data.
*/
if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
receiver->td_maxwin += end - sender->td_maxend;
if (after(sack + win, receiver->td_maxend - 1)) {
receiver->td_maxend = sack + win;
if (win == 0)
receiver->td_maxend++;
}
if (ack == receiver->td_end)
receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
/*
* Check retransmissions.
*/
if (index == TCP_ACK_SET) {
if (state->last_dir == dir
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end
&& state->last_win == win)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
state->last_win = win;
state->retrans = 0;
}
}
res = true;
} else {
res = false;
if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
tn->tcp_be_liberal)
res = true;
if (!res && LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: %s ",
before(seq, sender->td_maxend + 1) ?
in_recv_win ?
before(sack, receiver->td_end + 1) ?
after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
: "ACK is under the lower bound (possible overly delayed ACK)"
: "ACK is over the upper bound (ACKed data not seen yet)"
: "SEQ is under the lower bound (already ACKed data retransmitted)"
: "SEQ is over the upper bound (over the window of the receiver)");
}
pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
"receiver end=%u maxend=%u maxwin=%u\n",
res, sender->td_end, sender->td_maxend, sender->td_maxwin,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
return res;
}
Contributors
Person | Tokens | Prop | Commits | CommitProp |
Yasuyuki Kozakai | 902 | 74.42% | 1 | 4.35% |
Jozsef Kadlecsik | 109 | 8.99% | 5 | 21.74% |
Patrick McHardy | 78 | 6.44% | 5 | 21.74% |
Yuchung Cheng | 33 | 2.72% | 1 | 4.35% |
Pablo Neira Ayuso | 30 | 2.48% | 1 | 4.35% |
Gao Feng | 15 | 1.24% | 2 | 8.70% |
Alexey Dobriyan | 12 | 0.99% | 1 | 4.35% |
George Hansper | 12 | 0.99% | 1 | 4.35% |
Jan Engelhardt | 12 | 0.99% | 4 | 17.39% |
Changli Gao | 5 | 0.41% | 1 | 4.35% |
Hideaki Yoshifuji / 吉藤英明 | 4 | 0.33% | 1 | 4.35% |
Total | 1212 | 100.00% | 23 | 100.00% |
/* table of valid flag combinations - PUSH, ECE and CWR are always valid */
static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
TCPHDR_URG) + 1] =
{
[TCPHDR_SYN] = 1,
[TCPHDR_SYN|TCPHDR_URG] = 1,
[TCPHDR_SYN|TCPHDR_ACK] = 1,
[TCPHDR_RST] = 1,
[TCPHDR_RST|TCPHDR_ACK] = 1,
[TCPHDR_FIN|TCPHDR_ACK] = 1,
[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG] = 1,
[TCPHDR_ACK] = 1,
[TCPHDR_ACK|TCPHDR_URG] = 1,
};
/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
static int tcp_error(struct net *net, struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int8_t pf,
unsigned int hooknum)
{
const struct tcphdr *th;
struct tcphdr _tcph;
unsigned int tcplen = skb->len - dataoff;
u_int8_t tcpflags;
/* Smaller that minimal TCP header? */
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
if (th == NULL) {
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: short packet ");
return -NF_ACCEPT;
}
/* Not whole TCP header or malformed packet */
if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
if (LOG_INVALID(net, IPPROTO_TCP)