Michal Kubecek c36d39
From: Davide Caratti <dcaratti@redhat.com>
Michal Kubecek c36d39
Date: Fri, 20 Jan 2023 18:01:40 +0100
Michal Kubecek c36d39
Subject: act_mirred: use the backlog for nested calls to mirred ingress
Michal Kubecek c36d39
Patch-mainline: v6.3-rc1
Michal Kubecek c36d39
Git-commit: ca22da2fbd693b54dc8e3b7b54ccc9f7e9ba3640
Michal Kubecek c36d39
References: CVE-2022-4269 bsc#1206024
Michal Kubecek c36d39
Michal Kubecek c36d39
William reports kernel soft-lockups on some OVS topologies when TC mirred
Michal Kubecek c36d39
egress->ingress action is hit by local TCP traffic [1].
Michal Kubecek c36d39
The same can also be reproduced with SCTP (thanks Xin for verifying), when
Michal Kubecek c36d39
client and server reach themselves through mirred egress to ingress, and
Michal Kubecek c36d39
one of the two peers sends a "heartbeat" packet (from within a timer).
Michal Kubecek c36d39
Michal Kubecek c36d39
Enqueueing to backlog proved to fix this soft lockup; however, as Cong
Michal Kubecek c36d39
noticed [2], we should preserve - when possible - the current mirred
Michal Kubecek c36d39
behavior that counts as "overlimits" any eventual packet drop subsequent to
Michal Kubecek c36d39
the mirred forwarding action [3]. A compromise solution might use the
Michal Kubecek c36d39
backlog only when tcf_mirred_act() has a nest level greater than one:
Michal Kubecek c36d39
change tcf_mirred_forward() accordingly.
Michal Kubecek c36d39
Michal Kubecek c36d39
Also, add a kselftest that can reproduce the lockup and verifies TC mirred
Michal Kubecek c36d39
ability to account for further packet drops after TC mirred egress->ingress
Michal Kubecek c36d39
(when the nest level is 1).
Michal Kubecek c36d39
Michal Kubecek c36d39
 [1] https://lore.kernel.org/netdev/33dc43f587ec1388ba456b4915c75f02a8aae226.1663945716.git.dcaratti@redhat.com/
Michal Kubecek c36d39
 [2] https://lore.kernel.org/netdev/Y0w%2FWWY60gqrtGLp@pop-os.localdomain/
Michal Kubecek c36d39
 [3] such behavior is not guaranteed: for example, if RPS or skb RX
Michal Kubecek c36d39
     timestamping is enabled on the mirred target device, the kernel
Michal Kubecek c36d39
     can defer receiving the skb and return NET_RX_SUCCESS inside
Michal Kubecek c36d39
     tcf_mirred_forward().
Michal Kubecek c36d39
Michal Kubecek c36d39
Reported-by: William Zhao <wizhao@redhat.com>
Michal Kubecek c36d39
CC: Xin Long <lucien.xin@gmail.com>
Michal Kubecek c36d39
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Michal Kubecek c36d39
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Michal Kubecek c36d39
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Michal Kubecek c36d39
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Michal Kubecek c36d39
Acked-by: Michal Kubecek <mkubecek@suse.cz>
Michal Kubecek c36d39
Michal Kubecek c36d39
---
Michal Kubecek c36d39
 net/sched/act_mirred.c                        |  7 +++
Michal Kubecek c36d39
 .../selftests/net/forwarding/tc_actions.sh    | 49 ++++++++++++++++++-
Michal Kubecek c36d39
 2 files changed, 55 insertions(+), 1 deletion(-)
Michal Kubecek c36d39
Michal Kubecek c36d39
--- a/net/sched/act_mirred.c
Michal Kubecek c36d39
+++ b/net/sched/act_mirred.c
Michal Kubecek c36d39
@@ -207,12 +207,19 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
Michal Kubecek c36d39
 	return err;
Michal Kubecek c36d39
 }
Michal Kubecek c36d39
 
Michal Kubecek c36d39
+static bool is_mirred_nested(void)
Michal Kubecek c36d39
+{
Michal Kubecek c36d39
+	return unlikely(__this_cpu_read(mirred_nest_level) > 1);
Michal Kubecek c36d39
+}
Michal Kubecek c36d39
+
Michal Kubecek c36d39
 static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
Michal Kubecek c36d39
 {
Michal Kubecek c36d39
 	int err;
Michal Kubecek c36d39
 
Michal Kubecek c36d39
 	if (!want_ingress)
Michal Kubecek c36d39
 		err = dev_queue_xmit(skb);
Michal Kubecek c36d39
+	else if (is_mirred_nested())
Michal Kubecek c36d39
+		err = netif_rx(skb);
Michal Kubecek c36d39
 	else
Michal Kubecek c36d39
 		err = netif_receive_skb(skb);
Michal Kubecek c36d39
 
Michal Kubecek c36d39
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
Michal Kubecek c36d39
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
Michal Kubecek c36d39
@@ -2,7 +2,8 @@
Michal Kubecek c36d39
 # SPDX-License-Identifier: GPL-2.0
Michal Kubecek c36d39
 
Michal Kubecek c36d39
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
Michal Kubecek c36d39
-	mirred_egress_mirror_test gact_trap_test"
Michal Kubecek c36d39
+	mirred_egress_mirror_test gact_trap_test \
Michal Kubecek c36d39
+	mirred_egress_to_ingress_tcp_test"
Michal Kubecek c36d39
 NUM_NETIFS=4
Michal Kubecek c36d39
 source tc_common.sh
Michal Kubecek c36d39
 source lib.sh
Michal Kubecek c36d39
@@ -148,6 +149,52 @@ gact_trap_test()
Michal Kubecek c36d39
 	log_test "trap ($tcflags)"
Michal Kubecek c36d39
 }
Michal Kubecek c36d39
 
Michal Kubecek c36d39
+mirred_egress_to_ingress_tcp_test()
Michal Kubecek c36d39
+{
Michal Kubecek c36d39
+	local tmpfile=$(mktemp) tmpfile1=$(mktemp)
Michal Kubecek c36d39
+
Michal Kubecek c36d39
+	RET=0
Michal Kubecek c36d39
+	dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$tmpfile
Michal Kubecek c36d39
+	tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
Michal Kubecek c36d39
+		$tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
Michal Kubecek c36d39
+			action ct commit nat src addr 192.0.2.2 pipe \
Michal Kubecek c36d39
+			action ct clear pipe \
Michal Kubecek c36d39
+			action ct commit nat dst addr 192.0.2.1 pipe \
Michal Kubecek c36d39
+			action ct clear pipe \
Michal Kubecek c36d39
+			action skbedit ptype host pipe \
Michal Kubecek c36d39
+			action mirred ingress redirect dev $h1
Michal Kubecek c36d39
+	tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
Michal Kubecek c36d39
+		$tcflags ip_proto icmp \
Michal Kubecek c36d39
+			action mirred ingress redirect dev $h1
Michal Kubecek c36d39
+	tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
Michal Kubecek c36d39
+		ip_proto icmp \
Michal Kubecek c36d39
+			action drop
Michal Kubecek c36d39
+
Michal Kubecek c36d39
+	ip vrf exec v$h1 nc --recv-only -w10 -l -p 12345 -o $tmpfile1  &
Michal Kubecek c36d39
+	local rpid=$!
Michal Kubecek c36d39
+	ip vrf exec v$h1 nc -w1 --send-only 192.0.2.2 12345 <$tmpfile
Michal Kubecek c36d39
+	wait -n $rpid
Michal Kubecek c36d39
+	cmp -s $tmpfile $tmpfile1
Michal Kubecek c36d39
+	check_err $? "server output check failed"
Michal Kubecek c36d39
+
Michal Kubecek c36d39
+	$MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
Michal Kubecek c36d39
+		-t icmp "ping,id=42,seq=5" -q
Michal Kubecek c36d39
+	tc_check_packets "dev $h1 egress" 101 10
Michal Kubecek c36d39
+	check_err $? "didn't mirred redirect ICMP"
Michal Kubecek c36d39
+	tc_check_packets "dev $h1 ingress" 102 10
Michal Kubecek c36d39
+	check_err $? "didn't drop mirred ICMP"
Michal Kubecek c36d39
+	local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits)
Michal Kubecek c36d39
+	test ${overlimits} = 10
Michal Kubecek c36d39
+	check_err $? "wrong overlimits, expected 10 got ${overlimits}"
Michal Kubecek c36d39
+
Michal Kubecek c36d39
+	tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
Michal Kubecek c36d39
+	tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
Michal Kubecek c36d39
+	tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
Michal Kubecek c36d39
+
Michal Kubecek c36d39
+	rm -f $tmpfile $tmpfile1
Michal Kubecek c36d39
+	log_test "mirred_egress_to_ingress_tcp ($tcflags)"
Michal Kubecek c36d39
+}
Michal Kubecek c36d39
+
Michal Kubecek c36d39
 setup_prepare()
Michal Kubecek c36d39
 {
Michal Kubecek c36d39
 	h1=${NETIFS[p1]}