|
Ali Abdallah |
d58d31 |
From d3b0ce4663733c7fd9482ff742a7133ed5c3e22e Mon Sep 17 00:00:00 2001
|
|
Ali Abdallah |
d58d31 |
From: Ali Abdallah <aabdallah@suse.de>
|
|
Ali Abdallah |
d58d31 |
Date: Fri, 28 May 2021 09:03:25 +0200
|
|
Ali Abdallah |
d58d31 |
Patch-mainline: v5.1-rc1
|
|
Ali Abdallah |
d58d31 |
Git-commit: be0502a3f2e94211a8809a09ecbc3a017189b8fb
|
|
Ali Abdallah |
d58d31 |
References: bsc#1183947 bsc#1185950
|
|
Ali Abdallah |
d58d31 |
Subject: [PATCH] netfilter: conntrack: tcp: only close if RST matches exact
|
|
Ali Abdallah |
d58d31 |
sequence
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
TCP resets cause instant transition from established to closed state
|
|
Ali Abdallah |
d58d31 |
provided the reset is in-window. Endpoints that implement RFC 5961
|
|
Ali Abdallah |
d58d31 |
require resets to match the next expected sequence number.
|
|
Ali Abdallah |
d58d31 |
RST segments that are in-window (but that do not match RCV.NXT) are
|
|
Ali Abdallah |
d58d31 |
ignored, and a "challenge ACK" is sent back.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Main problem for conntrack is that its a middlebox, i.e. whereas an end
|
|
Ali Abdallah |
d58d31 |
host might have ACK'd SEQ (and would thus accept an RST with this
|
|
Ali Abdallah |
d58d31 |
sequence number), conntrack might not have seen this ACK (yet).
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Therefore we can't simply flag RSTs with non-exact match as invalid.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
This updates RST processing as follows:
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
1. If the connection is in a state other than ESTABLISHED, nothing is
|
|
Ali Abdallah |
d58d31 |
changed, RST is subject to normal in-window check.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
2. If the RSTs sequence number either matches exactly RCV.NXT,
|
|
Ali Abdallah |
d58d31 |
connection state moves to CLOSE.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
3. The same applies if the RST sequence number aligns with a previous
|
|
Ali Abdallah |
d58d31 |
packet in the same direction.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
In all other cases, the connection remains in ESTABLISHED state.
|
|
Ali Abdallah |
d58d31 |
If the normal-in-window check passes, the timeout will be lowered
|
|
Ali Abdallah |
d58d31 |
to that of CLOSE.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
If the peer sends a challenge ack, connection timeout will be reset.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
If the challenge ACK triggers another RST (RST was valid after all),
|
|
Ali Abdallah |
d58d31 |
this 2nd RST will match expected sequence and conntrack state changes to
|
|
Ali Abdallah |
d58d31 |
CLOSE.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
If no challenge ACK is received, the connection will time out after
|
|
Ali Abdallah |
d58d31 |
CLOSE seconds (10 seconds by default), just like without this patch.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Packetdrill test case:
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
0.000 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
|
|
Ali Abdallah |
d58d31 |
0.000 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
|
|
Ali Abdallah |
d58d31 |
0.000 bind(3, ..., ...) = 0
|
|
Ali Abdallah |
d58d31 |
0.000 listen(3, 1) = 0
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
0.100 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
|
|
Ali Abdallah |
d58d31 |
0.100 > S. 0:0(0) ack 1 win 64240 <mss 1460,nop,nop,sackOK,nop,wscale 7>
|
|
Ali Abdallah |
d58d31 |
0.200 < . 1:1(0) ack 1 win 257
|
|
Ali Abdallah |
d58d31 |
0.200 accept(3, ..., ...) = 4
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// Receive a segment.
|
|
Ali Abdallah |
d58d31 |
0.210 < P. 1:1001(1000) ack 1 win 46
|
|
Ali Abdallah |
d58d31 |
0.210 > . 1:1(0) ack 1001
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// Application writes 1000 bytes.
|
|
Ali Abdallah |
d58d31 |
0.250 write(4, ..., 1000) = 1000
|
|
Ali Abdallah |
d58d31 |
0.250 > P. 1:1001(1000) ack 1001
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// First reset, old sequence. Conntrack (correctly) considers this
|
|
Ali Abdallah |
d58d31 |
// invalid due to failed window validation (regardless of this patch).
|
|
Ali Abdallah |
d58d31 |
0.260 < R 2:2(0) ack 1001 win 260
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// 2nd reset, but too far ahead sequence. Same: correctly handled
|
|
Ali Abdallah |
d58d31 |
// as invalid.
|
|
Ali Abdallah |
d58d31 |
0.270 < R 99990001:99990001(0) ack 1001 win 260
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// in-window, but not exact sequence.
|
|
Ali Abdallah |
d58d31 |
// Current Linux kernels might reply with a challenge ack, and do not
|
|
Ali Abdallah |
d58d31 |
// remove connection.
|
|
Ali Abdallah |
d58d31 |
// Without this patch, conntrack state moves to CLOSE.
|
|
Ali Abdallah |
d58d31 |
// With patch, timeout is lowered like CLOSE, but connection stays
|
|
Ali Abdallah |
d58d31 |
// in ESTABLISHED state.
|
|
Ali Abdallah |
d58d31 |
0.280 < R 1010:1010(0) ack 1001 win 260
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// Expect challenge ACK
|
|
Ali Abdallah |
d58d31 |
0.281 > . 1001:1001(0) ack 1001 win 501
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// With or without this patch, RST will cause connection
|
|
Ali Abdallah |
d58d31 |
// to move to CLOSE (sequence number matches)
|
|
Ali Abdallah |
d58d31 |
// 0.282 < R 1001:1001(0) ack 1001 win 260
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// ACK
|
|
Ali Abdallah |
d58d31 |
0.300 < . 1001:1001(0) ack 1001 win 257
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// more data could be exchanged here, connection
|
|
Ali Abdallah |
d58d31 |
// is still established
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// Client closes the connection.
|
|
Ali Abdallah |
d58d31 |
0.610 < F. 1001:1001(0) ack 1001 win 260
|
|
Ali Abdallah |
d58d31 |
0.650 > . 1001:1001(0) ack 1002
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// Close the connection without reading outstanding data
|
|
Ali Abdallah |
d58d31 |
0.700 close(4) = 0
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
// so one more reset. Will be deemed acceptable with patch as well:
|
|
Ali Abdallah |
d58d31 |
// connection is already closing.
|
|
Ali Abdallah |
d58d31 |
0.701 > R. 1001:1001(0) ack 1002 win 501
|
|
Ali Abdallah |
d58d31 |
// End packetdrill test case.
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
With patch, this generates following conntrack events:
|
|
Ali Abdallah |
d58d31 |
[NEW] 120 SYN_SENT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [UNREPLIED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 60 SYN_RECV src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 432000 ESTABLISHED src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 120 FIN_WAIT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 60 CLOSE_WAIT src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 10 CLOSE src=10.0.2.1 dst=10.0.0.1 sport=5437 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Without patch, first RST moves connection to close, whereas socket state
|
|
Ali Abdallah |
d58d31 |
does not change until FIN is received.
|
|
Ali Abdallah |
d58d31 |
[NEW] 120 SYN_SENT src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [UNREPLIED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 60 SYN_RECV src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 432000 ESTABLISHED src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
[UPDATE] 10 CLOSE src=10.0.2.1 dst=10.0.0.1 sport=5141 dport=80 [ASSURED]
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Cc: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
|
|
Ali Abdallah |
d58d31 |
Signed-off-by: Florian Westphal <fw@strlen.de>
|
|
Ali Abdallah |
d58d31 |
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
[This patch is required to easily backport another important fix for
|
|
Ali Abdallah |
d58d31 |
bsc#1183947 bsc#1185950]
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
Acked-by: Ali Abdallah <aabdallah@suse.de>
|
|
Ali Abdallah |
d58d31 |
---
|
|
Ali Abdallah |
d58d31 |
net/netfilter/nf_conntrack_proto_tcp.c | 52 ++++++++++++++++++++------
|
|
Ali Abdallah |
d58d31 |
1 file changed, 41 insertions(+), 11 deletions(-)
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
|
|
Ali Abdallah |
d58d31 |
index 804ff7de..3d12d82a 100644
|
|
Ali Abdallah |
d58d31 |
--- a/net/netfilter/nf_conntrack_proto_tcp.c
|
|
Ali Abdallah |
d58d31 |
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
|
|
Ali Abdallah |
d58d31 |
@@ -804,6 +804,12 @@ static unsigned int *tcp_get_timeouts(struct net *net)
|
|
Ali Abdallah |
d58d31 |
return tcp_pernet(net)->timeouts;
|
|
Ali Abdallah |
d58d31 |
}
|
|
Ali Abdallah |
d58d31 |
|
|
Ali Abdallah |
d58d31 |
+static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
|
|
Ali Abdallah |
d58d31 |
+{
|
|
Ali Abdallah |
d58d31 |
+ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
|
|
Ali Abdallah |
d58d31 |
+ test_bit(IPS_ASSURED_BIT, &ct->status);
|
|
Ali Abdallah |
d58d31 |
+}
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
/* Returns verdict for packet, or -1 for invalid. */
|
|
Ali Abdallah |
d58d31 |
static int tcp_packet(struct nf_conn *ct,
|
|
Ali Abdallah |
d58d31 |
const struct sk_buff *skb,
|
|
Ali Abdallah |
d58d31 |
@@ -997,18 +1003,40 @@ static int tcp_packet(struct nf_conn *ct,
|
|
Ali Abdallah |
d58d31 |
}
|
|
Ali Abdallah |
d58d31 |
break;
|
|
Ali Abdallah |
d58d31 |
case TCP_CONNTRACK_CLOSE:
|
|
Ali Abdallah |
d58d31 |
- if (index == TCP_RST_SET
|
|
Ali Abdallah |
d58d31 |
- && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
|
|
Ali Abdallah |
d58d31 |
- && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
|
|
Ali Abdallah |
d58d31 |
- /* Invalid RST */
|
|
Ali Abdallah |
d58d31 |
- spin_unlock_bh(&ct->lock);
|
|
Ali Abdallah |
d58d31 |
- if (LOG_INVALID(net, IPPROTO_TCP))
|
|
Ali Abdallah |
d58d31 |
+ if (index != TCP_RST_SET)
|
|
Ali Abdallah |
d58d31 |
+ break;
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
|
|
Ali Abdallah |
d58d31 |
+ u32 seq = ntohl(th->seq);
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
|
|
Ali Abdallah |
d58d31 |
+ /* Invalid RST */
|
|
Ali Abdallah |
d58d31 |
+ spin_unlock_bh(&ct->lock);
|
|
Ali Abdallah |
d58d31 |
nf_log_packet(net, pf, 0, skb, NULL, NULL,
|
|
Ali Abdallah |
d58d31 |
NULL, "nf_ct_tcp: invalid RST ");
|
|
Ali Abdallah |
d58d31 |
- return -NF_ACCEPT;
|
|
Ali Abdallah |
d58d31 |
- }
|
|
Ali Abdallah |
d58d31 |
- if (index == TCP_RST_SET
|
|
Ali Abdallah |
d58d31 |
- && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
|
|
Ali Abdallah |
d58d31 |
+ return -NF_ACCEPT;
|
|
Ali Abdallah |
d58d31 |
+ }
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ if (!nf_conntrack_tcp_established(ct) ||
|
|
Ali Abdallah |
d58d31 |
+ seq == ct->proto.tcp.seen[!dir].td_maxack)
|
|
Ali Abdallah |
d58d31 |
+ break;
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ /* Check if rst is part of train, such as
|
|
Ali Abdallah |
d58d31 |
+ * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
|
|
Ali Abdallah |
d58d31 |
+ * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42
|
|
Ali Abdallah |
d58d31 |
+ */
|
|
Ali Abdallah |
d58d31 |
+ if (ct->proto.tcp.last_index == TCP_ACK_SET &&
|
|
Ali Abdallah |
d58d31 |
+ ct->proto.tcp.last_dir == dir &&
|
|
Ali Abdallah |
d58d31 |
+ seq == ct->proto.tcp.last_end)
|
|
Ali Abdallah |
d58d31 |
+ break;
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ /* ... RST sequence number doesn't match exactly, keep
|
|
Ali Abdallah |
d58d31 |
+ * established state to allow a possible challenge ACK.
|
|
Ali Abdallah |
d58d31 |
+ */
|
|
Ali Abdallah |
d58d31 |
+ new_state = old_state;
|
|
Ali Abdallah |
d58d31 |
+ }
|
|
Ali Abdallah |
d58d31 |
+
|
|
Ali Abdallah |
d58d31 |
+ if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
|
|
Ali Abdallah |
d58d31 |
&& ct->proto.tcp.last_index == TCP_SYN_SET)
|
|
Ali Abdallah |
d58d31 |
|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
|
|
Ali Abdallah |
d58d31 |
&& ct->proto.tcp.last_index == TCP_ACK_SET))
|
|
Ali Abdallah |
d58d31 |
@@ -1024,7 +1052,7 @@ static int tcp_packet(struct nf_conn *ct,
|
|
Ali Abdallah |
d58d31 |
* segments we ignored. */
|
|
Ali Abdallah |
d58d31 |
goto in_window;
|
|
Ali Abdallah |
d58d31 |
}
|
|
Ali Abdallah |
d58d31 |
- /* Just fall through */
|
|
Ali Abdallah |
d58d31 |
+ break;
|
|
Ali Abdallah |
d58d31 |
default:
|
|
Ali Abdallah |
d58d31 |
/* Keep compilers happy. */
|
|
Ali Abdallah |
d58d31 |
break;
|
|
Ali Abdallah |
d58d31 |
@@ -1055,6 +1083,8 @@ static int tcp_packet(struct nf_conn *ct,
|
|
Ali Abdallah |
d58d31 |
if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
|
|
Ali Abdallah |
d58d31 |
timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
|
|
Ali Abdallah |
d58d31 |
timeout = timeouts[TCP_CONNTRACK_RETRANS];
|
|
Ali Abdallah |
d58d31 |
+ else if (unlikely(index == TCP_RST_SET))
|
|
Ali Abdallah |
d58d31 |
+ timeout = timeouts[TCP_CONNTRACK_CLOSE];
|
|
Ali Abdallah |
d58d31 |
else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
|
|
Ali Abdallah |
d58d31 |
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
|
|
Ali Abdallah |
d58d31 |
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
|
|
Ali Abdallah |
d58d31 |
--
|
|
Ali Abdallah |
d58d31 |
2.26.2
|
|
Ali Abdallah |
d58d31 |
|