Blob Blame History Raw
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2023 09:41:00 +0000
Subject: [PATCH] raw: use net_hash_mix() in hash function
References: bsc#1012628
Patch-mainline: 6.2.11
Git-commit: 6579f5bacc2c4cbc5ef6abb45352416939d1f844

[ Upstream commit 6579f5bacc2c4cbc5ef6abb45352416939d1f844 ]

Some applications seem to rely on RAW sockets.

If they use private netns, we can avoid piling all RAW
sockets bound to a given protocol into a single bucket.

Also place (struct raw_hashinfo).lock into its own
cache line to limit false sharing.

Alternative would be to have per-netns hashtables,
but this seems too expensive for most netns
where RAW sockets are not used.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Stable-dep-of: 0a78cf7264d2 ("raw: Fix NULL deref in raw_get_next().")
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/net/raw.h | 13 +++++++++++--
 net/ipv4/raw.c    | 13 +++++++------
 net/ipv6/raw.c    |  4 ++--
 3 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/include/net/raw.h b/include/net/raw.h
index 5e665934..2c004c20 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -15,6 +15,8 @@
 
 #include <net/inet_sock.h>
 #include <net/protocol.h>
+#include <net/netns/hash.h>
+#include <linux/hash.h>
 #include <linux/icmp.h>
 
 extern struct proto raw_prot;
@@ -29,13 +31,20 @@ int raw_local_deliver(struct sk_buff *, int);
 
 int raw_rcv(struct sock *, struct sk_buff *);
 
-#define RAW_HTABLE_SIZE	MAX_INET_PROTOS
+#define RAW_HTABLE_LOG	8
+#define RAW_HTABLE_SIZE	(1U << RAW_HTABLE_LOG)
 
 struct raw_hashinfo {
 	spinlock_t lock;
-	struct hlist_nulls_head ht[RAW_HTABLE_SIZE];
+
+	struct hlist_nulls_head ht[RAW_HTABLE_SIZE] ____cacheline_aligned;
 };
 
+static inline u32 raw_hashfunc(const struct net *net, u32 proto)
+{
+	return hash_32(net_hash_mix(net) ^ proto, RAW_HTABLE_LOG);
+}
+
 static inline void raw_hashinfo_init(struct raw_hashinfo *hashinfo)
 {
 	int i;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 006c1f0e..2a53a0bf 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
 	struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
 	struct hlist_nulls_head *hlist;
 
-	hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+	hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
 
 	spin_lock(&h->lock);
 	__sk_nulls_add_node_rcu(sk, hlist);
@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
  * RFC 1122: SHOULD pass TOS value up to the transport layer.
  * -> It does. And not only TOS, but all IP header.
  */
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int raw_v4_input(struct net *net, struct sk_buff *skb,
+			const struct iphdr *iph, int hash)
 {
-	struct net *net = dev_net(skb->dev);
 	struct hlist_nulls_head *hlist;
 	struct hlist_nulls_node *hnode;
 	int sdif = inet_sdif(skb);
@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
 
 int raw_local_deliver(struct sk_buff *skb, int protocol)
 {
-	int hash = protocol & (RAW_HTABLE_SIZE - 1);
+	struct net *net = dev_net(skb->dev);
 
-	return raw_v4_input(skb, ip_hdr(skb), hash);
+	return raw_v4_input(net, skb, ip_hdr(skb),
+			    raw_hashfunc(net, protocol));
 }
 
 static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
 	struct sock *sk;
 	int hash;
 
-	hash = protocol & (RAW_HTABLE_SIZE - 1);
+	hash = raw_hashfunc(net, protocol);
 	hlist = &raw_v4_hashinfo.ht[hash];
 
 	rcu_read_lock();
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ada087b5..45b35b5f 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = saddr + 1;
 
-	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+	hash = raw_hashfunc(net, nexthdr);
 	hlist = &raw_v6_hashinfo.ht[hash];
 	rcu_read_lock();
 	sk_nulls_for_each(sk, hnode, hlist) {
@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
 	struct sock *sk;
 	int hash;
 
-	hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+	hash = raw_hashfunc(net, nexthdr);
 	hlist = &raw_v6_hashinfo.ht[hash];
 	rcu_read_lock();
 	sk_nulls_for_each(sk, hnode, hlist) {
-- 
2.35.3