|
Gary Lin |
50946b |
From: John Fastabend <john.fastabend@gmail.com>
|
|
Gary Lin |
50946b |
Date: Thu, 1 Apr 2021 15:00:19 -0700
|
|
Gary Lin |
50946b |
Subject: bpf, sockmap: Fix sk->prot unhash op reset
|
|
Gary Lin |
50946b |
Patch-mainline: v5.12-rc7
|
|
Gary Lin |
50946b |
Git-commit: 1c84b33101c82683dee8b06761ca1f69e78c8ee7
|
|
Gary Lin |
50946b |
References: bsc#1155518
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
In '4da6a196f93b1' we fixed a potential unhash loop caused when
|
|
Gary Lin |
50946b |
a TLS socket in a sockmap was removed from the sockmap. This
|
|
Gary Lin |
50946b |
happened because the unhash operation on the TLS ctx continued
|
|
Gary Lin |
50946b |
to point at the sockmap implementation of unhash even though the
|
|
Gary Lin |
50946b |
psock has already been removed. The sockmap unhash handler when a
|
|
Gary Lin |
50946b |
psock is removed does the following,
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
void sock_map_unhash(struct sock *sk)
|
|
Gary Lin |
50946b |
{
|
|
Gary Lin |
50946b |
void (*saved_unhash)(struct sock *sk);
|
|
Gary Lin |
50946b |
struct sk_psock *psock;
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
rcu_read_lock();
|
|
Gary Lin |
50946b |
psock = sk_psock(sk);
|
|
Gary Lin |
50946b |
if (unlikely(!psock)) {
|
|
Gary Lin |
50946b |
rcu_read_unlock();
|
|
Gary Lin |
50946b |
if (sk->sk_prot->unhash)
|
|
Gary Lin |
50946b |
sk->sk_prot->unhash(sk);
|
|
Gary Lin |
50946b |
return;
|
|
Gary Lin |
50946b |
}
|
|
Gary Lin |
50946b |
[...]
|
|
Gary Lin |
50946b |
}
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
The unlikely() case is there to handle the case where psock is detached
|
|
Gary Lin |
50946b |
but the proto ops have not been updated yet. But, in the above case
|
|
Gary Lin |
50946b |
with TLS and removed psock we never fixed sk_prot->unhash() and unhash()
|
|
Gary Lin |
50946b |
points back to sock_map_unhash resulting in a loop. To fix this we added
|
|
Gary Lin |
50946b |
this bit of code,
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
static inline void sk_psock_restore_proto(struct sock *sk,
|
|
Gary Lin |
50946b |
struct sk_psock *psock)
|
|
Gary Lin |
50946b |
{
|
|
Gary Lin |
50946b |
sk->sk_prot->unhash = psock->saved_unhash;
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
This will set the sk_prot->unhash back to its saved value. This is the
|
|
Gary Lin |
50946b |
correct callback for a TLS socket that has been removed from the sock_map.
|
|
Gary Lin |
50946b |
Unfortunately, this also overwrites the unhash pointer for all psocks.
|
|
Gary Lin |
50946b |
We effectively break sockmap unhash handling for any future socks.
|
|
Gary Lin |
50946b |
Omitting the unhash operation will leave stale entries in the map if
|
|
Gary Lin |
50946b |
a socket transition through unhash, but does not do close() op.
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
To fix set unhash correctly before calling into tls_update. This way the
|
|
Gary Lin |
50946b |
TLS enabled socket will point to the saved unhash() handler.
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
Fixes: 4da6a196f93b1 ("bpf: Sockmap/tls, during free we may call tcp_bpf_unhash() in loop")
|
|
Gary Lin |
50946b |
Reported-by: Cong Wang <xiyou.wangcong@gmail.com>
|
|
Gary Lin |
50946b |
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
|
|
Gary Lin |
50946b |
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
|
|
Gary Lin |
50946b |
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
|
|
Gary Lin |
50946b |
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
|
|
Gary Lin |
50946b |
Link: https://lore.kernel.org/bpf/161731441904.68884.15593917809745631972.stgit@john-XPS-13-9370
|
|
Gary Lin |
50946b |
Acked-by: Gary Lin <glin@suse.com>
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
NOTE from Gary:
|
|
Gary Lin |
50946b |
* This patch is modified to fit SLE15-SP2.
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
---
|
|
Gary Lin |
50946b |
include/linux/skmsg.h | 8 ++++++--
|
|
Gary Lin |
50946b |
1 file changed, 6 insertions(+), 2 deletions(-)
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
--- a/include/linux/skmsg.h
|
|
Gary Lin |
50946b |
+++ b/include/linux/skmsg.h
|
|
Gary Lin |
50946b |
@@ -358,13 +358,17 @@ static inline void sk_psock_update_proto
|
|
Gary Lin |
50946b |
static inline void sk_psock_restore_proto(struct sock *sk,
|
|
Gary Lin |
50946b |
struct sk_psock *psock)
|
|
Gary Lin |
50946b |
{
|
|
Gary Lin |
50946b |
- sk->sk_prot->unhash = psock->saved_unhash;
|
|
Gary Lin |
50946b |
-
|
|
Gary Lin |
50946b |
if (psock->sk_proto) {
|
|
Gary Lin |
50946b |
struct inet_connection_sock *icsk = inet_csk(sk);
|
|
Gary Lin |
50946b |
bool has_ulp = !!icsk->icsk_ulp_data;
|
|
Gary Lin |
50946b |
|
|
Gary Lin |
50946b |
if (has_ulp) {
|
|
Gary Lin |
50946b |
+ /* TLS does not have an unhash proto in SW cases, but we need
|
|
Gary Lin |
50946b |
+ * to ensure we stop using the sock_map unhash routine because
|
|
Gary Lin |
50946b |
+ * the associated psock is being removed. So use the original
|
|
Gary Lin |
50946b |
+ * unhash handler.
|
|
Gary Lin |
50946b |
+ */
|
|
Gary Lin |
50946b |
+ WRITE_ONCE(sk->sk_prot->unhash, psock->saved_unhash);
|
|
Gary Lin |
50946b |
tcp_update_ulp(sk, psock->sk_proto,
|
|
Gary Lin |
50946b |
psock->saved_write_space);
|
|
Gary Lin |
50946b |
} else {
|