Jiri Slaby 7a187a
From: Davide Caratti <dcaratti@redhat.com>
Jiri Slaby 7a187a
Date: Fri, 20 Jan 2023 18:01:40 +0100
Jiri Slaby 7a187a
Subject: [PATCH] act_mirred: use the backlog for nested calls to mirred
Jiri Slaby 7a187a
 ingress
Jiri Slaby 7a187a
References: bsc#1012628
Jiri Slaby 7a187a
Patch-mainline: 6.2.9
Jiri Slaby 7a187a
Git-commit: ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640
Jiri Slaby 7a187a
Jiri Slaby 7a187a
[ Upstream commit ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640 ]
Jiri Slaby 7a187a
Jiri Slaby 7a187a
William reports kernel soft-lockups on some OVS topologies when TC mirred
Jiri Slaby 7a187a
egress->ingress action is hit by local TCP traffic [1].
Jiri Slaby 7a187a
The same can also be reproduced with SCTP (thanks Xin for verifying), when
Jiri Slaby 7a187a
client and server reach themselves through mirred egress to ingress, and
Jiri Slaby 7a187a
one of the two peers sends a "heartbeat" packet (from within a timer).
Jiri Slaby 7a187a
Jiri Slaby 7a187a
Enqueueing to backlog proved to fix this soft lockup; however, as Cong
Jiri Slaby 7a187a
noticed [2], we should preserve - when possible - the current mirred
Jiri Slaby 7a187a
behavior that counts as "overlimits" any eventual packet drop subsequent to
Jiri Slaby 7a187a
the mirred forwarding action [3]. A compromise solution might use the
Jiri Slaby 7a187a
backlog only when tcf_mirred_act() has a nest level greater than one:
Jiri Slaby 7a187a
change tcf_mirred_forward() accordingly.
Jiri Slaby 7a187a
Jiri Slaby 7a187a
Also, add a kselftest that can reproduce the lockup and verifies TC mirred
Jiri Slaby 7a187a
ability to account for further packet drops after TC mirred egress->ingress
Jiri Slaby 7a187a
(when the nest level is 1).
Jiri Slaby 7a187a
Jiri Slaby 7a187a
 [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
Jiri Slaby 7a187a
 [2] https://lore.kernel.org/netdev/Y0w%2FWWY60gqrtGLp@pop-os.localdomain/
Jiri Slaby 7a187a
 [3] such behavior is not guaranteed: for example, if RPS or skb RX
Jiri Slaby 7a187a
     timestamping is enabled on the mirred target device, the kernel
Jiri Slaby 7a187a
     can defer receiving the skb and return NET_RX_SUCCESS inside
Jiri Slaby 7a187a
     tcf_mirred_forward().
Jiri Slaby 7a187a
Jiri Slaby 7a187a
Reported-by: William Zhao <wizhao@redhat.com>
Jiri Slaby 7a187a
CC: Xin Long <lucien.xin@gmail.com>
Jiri Slaby 7a187a
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Jiri Slaby 7a187a
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Jiri Slaby 7a187a
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Jiri Slaby 7a187a
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Jiri Slaby 7a187a
Signed-off-by: Sasha Levin <sashal@kernel.org>
Jiri Slaby 7a187a
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Jiri Slaby 7a187a
---
Jiri Slaby 7a187a
 net/sched/act_mirred.c                        |  7 +++
Jiri Slaby 7a187a
 .../selftests/net/forwarding/tc_actions.sh    | 49 ++++++++++++++++++-
Jiri Slaby 7a187a
 2 files changed, 55 insertions(+), 1 deletion(-)
Jiri Slaby 7a187a
Jiri Slaby 7a187a
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
Jiri Slaby 7a187a
index c8abb513..8037ec9b 100644
Jiri Slaby 7a187a
--- a/net/sched/act_mirred.c
Jiri Slaby 7a187a
+++ b/net/sched/act_mirred.c
Jiri Slaby 7a187a
@@ -206,12 +206,19 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
Jiri Slaby 7a187a
 	return err;
Jiri Slaby 7a187a
 }
Jiri Slaby 7a187a
 
Jiri Slaby 7a187a
+static bool is_mirred_nested(void)
Jiri Slaby 7a187a
+{
Jiri Slaby 7a187a
+	return unlikely(__this_cpu_read(mirred_nest_level) > 1);
Jiri Slaby 7a187a
+}
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
 static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
Jiri Slaby 7a187a
 {
Jiri Slaby 7a187a
 	int err;
Jiri Slaby 7a187a
 
Jiri Slaby 7a187a
 	if (!want_ingress)
Jiri Slaby 7a187a
 		err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
Jiri Slaby 7a187a
+	else if (is_mirred_nested())
Jiri Slaby 7a187a
+		err = netif_rx(skb);
Jiri Slaby 7a187a
 	else
Jiri Slaby 7a187a
 		err = netif_receive_skb(skb);
Jiri Slaby 7a187a
 
Jiri Slaby 7a187a
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
Jiri Slaby 7a187a
index 1e0a62f6..919c0dd9 100755
Jiri Slaby 7a187a
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
Jiri Slaby 7a187a
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
Jiri Slaby 7a187a
@@ -3,7 +3,8 @@
Jiri Slaby 7a187a
 
Jiri Slaby 7a187a
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
Jiri Slaby 7a187a
 	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
Jiri Slaby 7a187a
-	gact_trap_test mirred_egress_to_ingress_test"
Jiri Slaby 7a187a
+	gact_trap_test mirred_egress_to_ingress_test \
Jiri Slaby 7a187a
+	mirred_egress_to_ingress_tcp_test"
Jiri Slaby 7a187a
 NUM_NETIFS=4
Jiri Slaby 7a187a
 source tc_common.sh
Jiri Slaby 7a187a
 source lib.sh
Jiri Slaby 7a187a
@@ -198,6 +199,52 @@ mirred_egress_to_ingress_test()
Jiri Slaby 7a187a
 	log_test "mirred_egress_to_ingress ($tcflags)"
Jiri Slaby 7a187a
 }
Jiri Slaby 7a187a
 
Jiri Slaby 7a187a
+mirred_egress_to_ingress_tcp_test()
Jiri Slaby 7a187a
+{
Jiri Slaby 7a187a
+	local tmpfile=$(mktemp) tmpfile1=$(mktemp)
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
+	RET=0
Jiri Slaby 7a187a
+	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
Jiri Slaby 7a187a
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
Jiri Slaby 7a187a
+		$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
Jiri Slaby 7a187a
+			action ct commit nat src addr 192.0.2.2 pipe \
Jiri Slaby 7a187a
+			action ct clear pipe \
Jiri Slaby 7a187a
+			action ct commit nat dst addr 192.0.2.1 pipe \
Jiri Slaby 7a187a
+			action ct clear pipe \
Jiri Slaby 7a187a
+			action skbedit ptype host pipe \
Jiri Slaby 7a187a
+			action mirred ingress redirect dev $h1
Jiri Slaby 7a187a
+	tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
Jiri Slaby 7a187a
+		$tcflags ip_proto icmp \
Jiri Slaby 7a187a
+			action mirred ingress redirect dev $h1
Jiri Slaby 7a187a
+	tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
Jiri Slaby 7a187a
+		ip_proto icmp \
Jiri Slaby 7a187a
+			action drop
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
+	ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1  &
Jiri Slaby 7a187a
+	local rpid=$!
Jiri Slaby 7a187a
+	ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
Jiri Slaby 7a187a
+	wait -n $rpid
Jiri Slaby 7a187a
+	cmp -s $tmpfile $tmpfile1
Jiri Slaby 7a187a
+	check_err $? "server output check failed"
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
+	$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
Jiri Slaby 7a187a
+		-t icmp "ping,id=42,seq=5" -q
Jiri Slaby 7a187a
+	tc_check_packets "dev $h1 egress" 101 10
Jiri Slaby 7a187a
+	check_err $? "didn't mirred redirect ICMP"
Jiri Slaby 7a187a
+	tc_check_packets "dev $h1 ingress" 102 10
Jiri Slaby 7a187a
+	check_err $? "didn't drop mirred ICMP"
Jiri Slaby 7a187a
+	local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
Jiri Slaby 7a187a
+	test ${overlimits} = 10
Jiri Slaby 7a187a
+	check_err $? "wrong overlimits, expected 10 got ${overlimits}"
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
Jiri Slaby 7a187a
+	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
Jiri Slaby 7a187a
+	tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
+	rm -f $tmpfile $tmpfile1
Jiri Slaby 7a187a
+	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
Jiri Slaby 7a187a
+}
Jiri Slaby 7a187a
+
Jiri Slaby 7a187a
 setup_prepare()
Jiri Slaby 7a187a
 {
Jiri Slaby 7a187a
 	h1=${NETIFS[p1]}
Jiri Slaby 7a187a
-- 
Jiri Slaby 7a187a
2.35.3
Jiri Slaby 7a187a