Ali Abdallah 3e0da5
From c9361ff3a94a4d8f58fef53e1a5c68df5c107e6e Mon Sep 17 00:00:00 2001
Ali Abdallah 3e0da5
From: Ali Abdallah <aabdallah@suse.de>
Ali Abdallah 3e0da5
Date: Fri, 28 May 2021 11:14:28 +0200
Ali Abdallah 3e0da5
Subject: [PATCH] netfilter: conntrack: improve RST handling when tuple is
Ali Abdallah 3e0da5
 re-used
Michal Kubecek 9d8252
Patch-mainline: Submitted - 2021-05-20 - https://patchwork.ozlabs.org/project/netfilter-devel/patch/20210520105311.20745-1-fw@strlen.de/
Ali Abdallah 3e0da5
References: bsc#1183947 bsc#1185950
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
If we receive a SYN packet in original direction on an existing
Ali Abdallah 3e0da5
connection tracking entry, we let this SYN through because conntrack
Ali Abdallah 3e0da5
might be out-of-sync.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Conntrack gets back in sync when server responds with SYN/ACK and state
Ali Abdallah 3e0da5
gets updated accordingly.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
However, if server replies with RST, this packet might be marked as
Ali Abdallah 3e0da5
INVALID because td_maxack value reflects the *old* conntrack state
Ali Abdallah 3e0da5
and not the state of the originator of the RST.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Avoid td_maxack-based checks if previous packet was a SYN.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Unfortunately that is not be enough: an out of order ACK in original
Ali Abdallah 3e0da5
direction updates last_index, so we still end up marking valid RST.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Thus disable the sequence check when we are not in established state and
Ali Abdallah 3e0da5
the received RST has a sequence of 0.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Because marking RSTs as invalid usually leads to unwanted timeouts,
Ali Abdallah 3e0da5
also skip RST sequence checks if a conntrack entry is already closing.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Such entries can already be evicted via GC in case the table is full.
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Co-developed-by: Florian Westphal <fw@strlen.de>
Ali Abdallah 3e0da5
Signed-off-by: Florian Westphal <fw@strlen.de>
Ali Abdallah 3e0da5
Signed-off-by: Ali Abdallah <aabdallah@suse.de>
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
Acked-by: Ali Abdallah <aabdallah@suse.de>
Ali Abdallah 3e0da5
---
Ali Abdallah 3e0da5
 net/netfilter/nf_conntrack_proto_tcp.c | 53 +++++++++++++++++---------
Ali Abdallah 3e0da5
 1 file changed, 36 insertions(+), 17 deletions(-)
Ali Abdallah 3e0da5
Ali Abdallah 3e0da5
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
Ali Abdallah 3e0da5
index 0cf1b7805..9d825992c 100644
Ali Abdallah 3e0da5
--- a/net/netfilter/nf_conntrack_proto_tcp.c
Ali Abdallah 3e0da5
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
Ali Abdallah 3e0da5
@@ -833,6 +833,22 @@ static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
Ali Abdallah 3e0da5
 	       test_bit(IPS_ASSURED_BIT, &ct->status);
Ali Abdallah 3e0da5
 }
Ali Abdallah 3e0da5
 
Ali Abdallah 3e0da5
+static bool tcp_can_early_drop(const struct nf_conn *ct)
Ali Abdallah 3e0da5
+{
Ali Abdallah 3e0da5
+	switch (ct->proto.tcp.state) {
Ali Abdallah 3e0da5
+	case TCP_CONNTRACK_FIN_WAIT:
Ali Abdallah 3e0da5
+	case TCP_CONNTRACK_LAST_ACK:
Ali Abdallah 3e0da5
+	case TCP_CONNTRACK_TIME_WAIT:
Ali Abdallah 3e0da5
+	case TCP_CONNTRACK_CLOSE:
Ali Abdallah 3e0da5
+	case TCP_CONNTRACK_CLOSE_WAIT:
Ali Abdallah 3e0da5
+		return true;
Ali Abdallah 3e0da5
+	default:
Ali Abdallah 3e0da5
+		break;
Ali Abdallah 3e0da5
+	}
Ali Abdallah 3e0da5
+
Ali Abdallah 3e0da5
+	return false;
Ali Abdallah 3e0da5
+}
Ali Abdallah 3e0da5
+
Ali Abdallah 3e0da5
 /* Returns verdict for packet, or -1 for invalid. */
Ali Abdallah 3e0da5
 int nf_conntrack_tcp_packet(struct nf_conn *ct,
Ali Abdallah 3e0da5
 			    struct sk_buff *skb,
Ali Abdallah 3e0da5
@@ -1040,9 +1056,28 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
Ali Abdallah 3e0da5
 		if (index != TCP_RST_SET)
Ali Abdallah 3e0da5
 			break;
Ali Abdallah 3e0da5
 
Ali Abdallah 3e0da5
-		if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
Ali Abdallah 3e0da5
+		/* If we are closing, tuple might have been re-used already.
Ali Abdallah 3e0da5
+		 * last_index, last_ack, and all other ct fields used for
Ali Abdallah 3e0da5
+		 * sequence/window validation are outdated in that case.
Ali Abdallah 3e0da5
+		 *
Ali Abdallah 3e0da5
+		 * As the conntrack can already be expired by GC under pressure,
Ali Abdallah 3e0da5
+		 * just skip validation checks.
Ali Abdallah 3e0da5
+		 */
Ali Abdallah 3e0da5
+		if (tcp_can_early_drop(ct))
Ali Abdallah 3e0da5
+			goto in_window;
Ali Abdallah 3e0da5
+
Ali Abdallah 3e0da5
+		/* td_maxack might be outdated if we let a SYN through earlier */
Ali Abdallah 3e0da5
+		if ((ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) &&
Ali Abdallah 3e0da5
+		    ct->proto.tcp.last_index != TCP_SYN_SET) {
Ali Abdallah 3e0da5
 			u32 seq = ntohl(th->seq);
Ali Abdallah 3e0da5
 
Ali Abdallah 3e0da5
+			/* If we are not in established state and SEQ=0 this is most
Ali Abdallah 3e0da5
+			 * likely an answer to a SYN we let go through above (last_index
Ali Abdallah 3e0da5
+			 * can be updated due to out-of-order ACKs).
Ali Abdallah 3e0da5
+			 */
Ali Abdallah 3e0da5
+			if (seq == 0 && !nf_conntrack_tcp_established(ct))
Ali Abdallah 3e0da5
+				break;
Ali Abdallah 3e0da5
+
Ali Abdallah 3e0da5
 			if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
Ali Abdallah 3e0da5
 				/* Invalid RST  */
Ali Abdallah 3e0da5
 				spin_unlock_bh(&ct->lock);
Ali Abdallah 3e0da5
@@ -1165,22 +1200,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
Ali Abdallah 3e0da5
 	return NF_ACCEPT;
Ali Abdallah 3e0da5
 }
Ali Abdallah 3e0da5
 
Ali Abdallah 3e0da5
-static bool tcp_can_early_drop(const struct nf_conn *ct)
Ali Abdallah 3e0da5
-{
Ali Abdallah 3e0da5
-	switch (ct->proto.tcp.state) {
Ali Abdallah 3e0da5
-	case TCP_CONNTRACK_FIN_WAIT:
Ali Abdallah 3e0da5
-	case TCP_CONNTRACK_LAST_ACK:
Ali Abdallah 3e0da5
-	case TCP_CONNTRACK_TIME_WAIT:
Ali Abdallah 3e0da5
-	case TCP_CONNTRACK_CLOSE:
Ali Abdallah 3e0da5
-	case TCP_CONNTRACK_CLOSE_WAIT:
Ali Abdallah 3e0da5
-		return true;
Ali Abdallah 3e0da5
-	default:
Ali Abdallah 3e0da5
-		break;
Ali Abdallah 3e0da5
-	}
Ali Abdallah 3e0da5
-
Ali Abdallah 3e0da5
-	return false;
Ali Abdallah 3e0da5
-}
Ali Abdallah 3e0da5
-
Ali Abdallah 3e0da5
 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
Ali Abdallah 3e0da5
 
Ali Abdallah 3e0da5
 #include <linux/netfilter/nfnetlink.h>
Ali Abdallah 3e0da5
-- 
Ali Abdallah 3e0da5
2.26.2
Ali Abdallah 3e0da5