From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 Sep 2017 05:14:24 -0700
Subject: net: sk_buff rbnode reorg
Patch-mainline: v4.15-rc1
Git-commit: bffa72cf7f9df842f0016ba03586039296b4caaf
References: CVE-2018-5391 bsc#1103097
skb->rbnode shares space with skb->next, skb->prev and skb->tstamp
Current uses (TCP receive ofo queue and netem) need to save/restore
tstamp, while skb->dev is either NULL (TCP) or a constant for a given
queue (netem).
Since we plan using an RB tree for TCP retransmit queue to speedup SACK
processing with large BDP, this patch exchanges skb->dev and
skb->tstamp.
This saves some overhead in both TCP and netem.
v2: removes the swtstamp field from struct tcp_skb_cb
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Cc: Wei Wang <weiwan@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Michal Kubecek <mkubecek@suse.cz>
SLE15-SP1: big part of what this commit changes in net/ipv4/tcp_input.c is
merging tcp_skb_cb::swtstamp introduced by commit 98aaa913b4ed ("tcp:
Extend SOF_TIMESTAMPING_RX_SOFTWARE to TCP recvmsg") into skb::tstamp. As
we do not have commit 98aaa913b4ed backported into SLE15-SP1, we do not
need these changes either.
---
include/linux/skbuff.h | 16 ++++++++--------
net/ipv4/tcp_input.c | 9 ++++++---
net/sched/sch_netem.c | 7 ++++---
3 files changed, 18 insertions(+), 14 deletions(-)
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -587,8 +587,12 @@ struct sk_buff {
struct sk_buff *prev;
union {
- ktime_t tstamp;
- u64 skb_mstamp;
+ struct net_device *dev;
+ /* Some protocols might use this space to store information,
+ * while device pointer would be NULL.
+ * UDP receive path is one user.
+ */
+ unsigned long dev_scratch;
};
};
struct rb_node rbnode; /* used in netem & tcp stack */
@@ -596,12 +600,8 @@ struct sk_buff {
struct sock *sk;
union {
- struct net_device *dev;
- /* Some protocols might use this space to store information,
- * while device pointer would be NULL.
- * UDP receive path is one user.
- */
- unsigned long dev_scratch;
+ ktime_t tstamp;
+ u64 skb_mstamp;
};
/*
* This is the control buffer. It is free to use for every
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4426,7 +4426,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
/* In the typical case, we are adding an skb to the end of the list.
* Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup.
*/
- if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) {
+ if (tcp_try_coalesce(sk, tp->ooo_last_skb,
+ skb, &fragstolen)) {
coalesce_done:
tcp_grow_window(sk, skb);
kfree_skb_partial(skb, fragstolen);
@@ -4476,7 +4477,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb1);
goto merge_right;
}
- } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) {
+ } else if (tcp_try_coalesce(sk, skb1,
+ skb, &fragstolen)) {
goto coalesce_done;
}
p = &parent->rb_right;
@@ -4527,7 +4529,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
__skb_pull(skb, hdrlen);
eaten = (tail &&
- tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
+ tcp_try_coalesce(sk, tail,
+ skb, fragstolen)) ? 1 : 0;
tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
if (!eaten) {
__skb_queue_tail(&sk->sk_receive_queue, skb);
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -146,7 +146,6 @@ struct netem_sched_data {
*/
struct netem_skb_cb {
psched_time_t time_to_send;
- ktime_t tstamp_save;
};
@@ -561,7 +560,6 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch,
}
cb->time_to_send = now + delay;
- cb->tstamp_save = skb->tstamp;
++q->counter;
tfifo_enqueue(skb, sch);
} else {
@@ -629,7 +627,10 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
- skb->tstamp = netem_skb_cb(skb)->tstamp_save;
+ /* skb->dev shares skb->rbnode area,
+ * we need to restore its value.
+ */
+ skb->dev = qdisc_dev(sch);
#ifdef CONFIG_NET_CLS_ACT
/*