From: Eric Dumazet <edumazet@google.com>
Date: Thu, 10 Oct 2019 20:17:46 -0700
Subject: tcp: annotate sk->sk_wmem_queued lockless reads
Patch-mainline: v5.4-rc4
Git-commit: ab4e846a82d0ae00176de19f2db3c5c64f8eb5f2
References: bsc#1154353
For the sake of tcp_poll(), there are few places where we fetch
sk->sk_wmem_queued while this field can change from IRQ or other cpu.
We need to add READ_ONCE() annotations, and also make sure write
sides use corresponding WRITE_ONCE() to avoid store-tearing.
sk_wmem_queued_add() helper is added so that we can in
the future convert to ADD_ONCE() or equivalent if/when
available.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Thomas Bogendoerfer <tbogendoerfer@suse.de>
---
include/net/sock.h | 15 ++++++++++-----
include/trace/events/sock.h | 2 +-
net/core/datagram.c | 2 +-
net/core/sock.c | 2 +-
net/ipv4/inet_diag.c | 2 +-
net/ipv4/tcp.c | 4 ++--
net/ipv4/tcp_output.c | 14 +++++++-------
net/sched/em_meta.c | 2 +-
8 files changed, 24 insertions(+), 19 deletions(-)
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -878,12 +878,17 @@ static inline bool sk_acceptq_is_full(co
*/
static inline int sk_stream_min_wspace(const struct sock *sk)
{
- return sk->sk_wmem_queued >> 1;
+ return READ_ONCE(sk->sk_wmem_queued) >> 1;
}
static inline int sk_stream_wspace(const struct sock *sk)
{
- return READ_ONCE(sk->sk_sndbuf) - sk->sk_wmem_queued;
+ return READ_ONCE(sk->sk_sndbuf) - READ_ONCE(sk->sk_wmem_queued);
+}
+
+static inline void sk_wmem_queued_add(struct sock *sk, int val)
+{
+ WRITE_ONCE(sk->sk_wmem_queued, sk->sk_wmem_queued + val);
}
void sk_stream_write_space(struct sock *sk);
@@ -1207,7 +1212,7 @@ static inline void sk_refcnt_debug_relea
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
- if (sk->sk_wmem_queued >= READ_ONCE(sk->sk_sndbuf))
+ if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
return false;
return sk->sk_prot->stream_memory_free ?
@@ -1467,7 +1472,7 @@ DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cach
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
- sk->sk_wmem_queued -= skb->truesize;
+ sk_wmem_queued_add(sk, -skb->truesize);
sk_mem_uncharge(sk, skb->truesize);
if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
@@ -2014,7 +2019,7 @@ static inline int skb_copy_to_page_nocac
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
return 0;
}
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -115,7 +115,7 @@ TRACE_EVENT(sock_exceed_buf_limit,
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
__entry->sysctl_wmem = sk_get_wmem0(sk, prot);
__entry->wmem_alloc = refcount_read(&sk->sk_wmem_alloc);
- __entry->wmem_queued = sk->sk_wmem_queued;
+ __entry->wmem_queued = READ_ONCE(sk->sk_wmem_queued);
__entry->kind = kind;
),
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -640,7 +640,7 @@ int __zerocopy_sg_from_iter(struct sock
skb->len += copied;
skb->truesize += truesize;
if (sk && sk->sk_type == SOCK_STREAM) {
- sk->sk_wmem_queued += truesize;
+ sk_wmem_queued_add(sk, truesize);
sk_mem_charge(sk, truesize);
} else {
refcount_add(truesize, &skb->sk->sk_wmem_alloc);
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3212,7 +3212,7 @@ void sk_get_meminfo(const struct sock *s
mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
- mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+ mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -193,7 +193,7 @@ int inet_sk_diag_fill(struct sock *sk, s
if (ext & (1 << (INET_DIAG_MEMINFO - 1))) {
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
- .idiag_wmem = sk->sk_wmem_queued,
+ .idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
.idiag_fmem = sk->sk_forward_alloc,
.idiag_tmem = sk_wmem_alloc_get(sk),
};
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -659,7 +659,7 @@ static void skb_entail(struct sock *sk,
tcb->sacked = 0;
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
@@ -1034,7 +1034,7 @@ new_segment:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- sk->sk_wmem_queued += copy;
+ sk_wmem_queued_add(sk, copy);
sk_mem_charge(sk, copy);
skb->ip_summed = CHECKSUM_PARTIAL;
WRITE_ONCE(tp->write_seq, tp->write_seq + copy);
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1188,7 +1188,7 @@ static void tcp_queue_skb(struct sock *s
WRITE_ONCE(tp->write_seq, TCP_SKB_CB(skb)->end_seq);
__skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
}
@@ -1322,7 +1322,7 @@ int tcp_fragment(struct sock *sk, enum t
return -ENOMEM; /* We'll just try again later. */
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
nlen = skb->len - len - nsize;
buff->truesize += nlen;
@@ -1432,7 +1432,7 @@ int tcp_trim_head(struct sock *sk, struc
if (delta_truesize) {
skb->truesize -= delta_truesize;
- sk->sk_wmem_queued -= delta_truesize;
+ sk_wmem_queued_add(sk, -delta_truesize);
sk_mem_uncharge(sk, delta_truesize);
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
}
@@ -1877,7 +1877,7 @@ static int tso_fragment(struct sock *sk,
return -ENOMEM;
skb_copy_decrypted(buff, skb);
- sk->sk_wmem_queued += buff->truesize;
+ sk_wmem_queued_add(sk, buff->truesize);
sk_mem_charge(sk, buff->truesize);
buff->truesize += nlen;
skb->truesize -= nlen;
@@ -2141,7 +2141,7 @@ static int tcp_mtu_probe(struct sock *sk
nskb = sk_stream_alloc_skb(sk, probe_size, GFP_ATOMIC, false);
if (!nskb)
return -1;
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = tcp_send_head(sk);
@@ -3211,7 +3211,7 @@ int tcp_send_synack(struct sock *sk)
tcp_rtx_queue_unlink_and_free(skb, sk);
__skb_header_release(nskb);
tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb);
- sk->sk_wmem_queued += nskb->truesize;
+ sk_wmem_queued_add(sk, nskb->truesize);
sk_mem_charge(sk, nskb->truesize);
skb = nskb;
}
@@ -3436,7 +3436,7 @@ static void tcp_connect_queue_skb(struct
tcb->end_seq += skb->len;
__skb_header_release(skb);
- sk->sk_wmem_queued += skb->truesize;
+ sk_wmem_queued_add(sk, skb->truesize);
sk_mem_charge(sk, skb->truesize);
WRITE_ONCE(tp->write_seq, tcb->end_seq);
tp->packets_out += tcp_skb_pcount(skb);
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -446,7 +446,7 @@ META_COLLECTOR(int_sk_wmem_queued)
*err = -1;
return;
}
- dst->value = sk->sk_wmem_queued;
+ dst->value = READ_ONCE(sk->sk_wmem_queued);
}
META_COLLECTOR(int_sk_fwd_alloc)