Blob Blame History Raw
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Tue, 18 Feb 2020 17:10:20 +0000
Subject: bpf: Allow selecting reuseport socket from a SOCKMAP/SOCKHASH
Patch-mainline: v5.7-rc1
Git-commit: 9fed9000c5c6cacfcaaa48aff74818072ae294cc
References: bsc#1177028

SOCKMAP & SOCKHASH now support storing references to listening
sockets. Nothing keeps us from using these map types a collection of
sockets to select from in BPF reuseport programs. Whitelist the map types
with the bpf_sk_select_reuseport helper.

The restriction that the socket has to be a member of a reuseport group
still applies. Sockets in SOCKMAP/SOCKHASH that don't have sk_reuseport_cb
set are not a valid target and we signal it with -EINVAL.

The main benefit from this change is that, in contrast to
REUSEPORT_SOCKARRAY, SOCK{MAP,HASH} don't impose a restriction that a
listening socket can be just one BPF map at the same time.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200218171023.844439-9-jakub@cloudflare.com
Acked-by: Gary Lin <glin@suse.com>
---
 kernel/bpf/verifier.c |   10 +++++++---
 net/core/filter.c     |   15 ++++++++++-----
 2 files changed, 17 insertions(+), 8 deletions(-)

--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3693,14 +3693,16 @@ static int check_map_func_compatibility(
 		if (func_id != BPF_FUNC_sk_redirect_map &&
 		    func_id != BPF_FUNC_sock_map_update &&
 		    func_id != BPF_FUNC_map_delete_elem &&
-		    func_id != BPF_FUNC_msg_redirect_map)
+		    func_id != BPF_FUNC_msg_redirect_map &&
+		    func_id != BPF_FUNC_sk_select_reuseport)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_SOCKHASH:
 		if (func_id != BPF_FUNC_sk_redirect_hash &&
 		    func_id != BPF_FUNC_sock_hash_update &&
 		    func_id != BPF_FUNC_map_delete_elem &&
-		    func_id != BPF_FUNC_msg_redirect_hash)
+		    func_id != BPF_FUNC_msg_redirect_hash &&
+		    func_id != BPF_FUNC_sk_select_reuseport)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@@ -3774,7 +3776,9 @@ static int check_map_func_compatibility(
 			goto error;
 		break;
 	case BPF_FUNC_sk_select_reuseport:
-		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY)
+		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
+		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
+		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
 			goto error;
 		break;
 	case BPF_FUNC_map_peek_elem:
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8646,6 +8646,7 @@ struct sock *bpf_run_sk_reuseport(struct
 BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
 	   struct bpf_map *, map, void *, key, u32, flags)
 {
+	bool is_sockarray = map->map_type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
 	struct sock_reuseport *reuse;
 	struct sock *selected_sk;
 
@@ -8654,12 +8655,16 @@ BPF_CALL_4(sk_select_reuseport, struct s
 		return -ENOENT;
 
 	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
-	if (!reuse)
-		/* selected_sk is unhashed (e.g. by close()) after the
-		 * above map_lookup_elem().  Treat selected_sk has already
-		 * been removed from the map.
+	if (!reuse) {
+		/* reuseport_array has only sk with non NULL sk_reuseport_cb.
+		 * The only (!reuse) case here is - the sk has already been
+		 * unhashed (e.g. by close()), so treat it as -ENOENT.
+		 *
+		 * Other maps (e.g. sock_map) do not provide this guarantee and
+		 * the sk may never be in the reuseport group to begin with.
 		 */
-		return -ENOENT;
+		return is_sockarray ? -ENOENT : -EINVAL;
+	}
 
 	if (unlikely(reuse->reuseport_id != reuse_kern->reuseport_id)) {
 		struct sock *sk;